From c9a918838c07cbef934c8ef818d8f0e719015c3a Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:07:29 +0900 Subject: PCI: pcie, aer: checkpatch style cleanup in pcie/aer/* Before: drivers/pci/pcie/aer/aer_inject.c total: 4 errors, 4 warnings, 473 lines checked drivers/pci/pcie/aer/aerdrv.c total: 5 errors, 2 warnings, 333 lines checked drivers/pci/pcie/aer/aerdrv.h total: 1 errors, 0 warnings, 139 lines checked drivers/pci/pcie/aer/aerdrv_core.c total: 4 errors, 3 warnings, 872 lines checked drivers/pci/pcie/aer/aerdrv_errprint.c total: 12 errors, 11 warnings, 248 lines checked After: drivers/pci/pcie/aer/aer_inject.c total: 0 errors, 0 warnings, 466 lines checked drivers/pci/pcie/aer/aerdrv.c total: 0 errors, 0 warnings, 335 lines checked drivers/pci/pcie/aer/aerdrv.h total: 0 errors, 0 warnings, 139 lines checked drivers/pci/pcie/aer/aerdrv_core.c total: 0 errors, 0 warnings, 869 lines checked drivers/pci/pcie/aer/aerdrv_errprint.c total: 0 errors, 10 warnings, 247 lines checked Signed-off-by: Hidetoshi Seto Reviewed-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index bbd7428ca2d0..820ea73d25f7 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -21,7 +21,7 @@ #define AER_ERROR(d) (d & AER_ERROR_MASK) /* Root Error Status Register Bits */ -#define ROOT_ERR_STATUS_MASKS 0x0f +#define ROOT_ERR_STATUS_MASKS 0x0f #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ PCI_EXP_RTCTL_SENFEE| \ @@ -65,7 +65,7 @@ struct aer_err_info { int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ int flags; unsigned int status; /* COR/UNCOR Error Status */ - struct header_log_regs tlp; /* TLP Header */ + struct header_log_regs tlp; /* TLP Header */ }; struct aer_err_source { @@ -136,4 +136,4 @@ static inline int aer_osc_setup(struct pcie_device *pciedev) } #endif -#endif //_AERDRV_H_ +#endif /* _AERDRV_H_ */ -- cgit v1.2.3 From 0d90c3ac0bb89acfbf481c8b06749b00eade6545 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:12:25 +0900 Subject: PCI: pcie, aer: refer mask state in mask register properly ERR_{,UN}CORRECTABLE_ERROR_MASK are set of error bits which linux know, set of PCI_ERR_COR_* and PCI_ERR_UNC_* defined in linux/pci_regs.h. This masks make aerdrv not to report errors of unknown bit, while aerdrv have ability to report such undefined errors as "Unknown Error Bit %2d". OTOH aerdrv_errprint does not have any check of setting in mask register. So it could report masked wrong error by finding bit in status without knowing that the bit is masked in the mask register. This patch changes aerdrv to use mask state in mask register propely instead of defined/hardcoded ERR_{,UN}CORRECTABLE_ERROR_MASK. This change prevents aerdrv from reporting masked error, and also enable reporting unknown errors. Signed-off-by: Hidetoshi Seto Reviewed-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 820ea73d25f7..0db530db9439 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -47,9 +47,6 @@ #define AER_TLP_HEADER_VALID_FLAG 0x00000001 #define AER_MULTI_ERROR_VALID_FLAG 0x00000002 -#define ERR_CORRECTABLE_ERROR_MASK 0x000031c1 -#define ERR_UNCORRECTABLE_ERROR_MASK 0x001ff010 - struct header_log_regs { unsigned int dw0; unsigned int dw1; @@ -65,6 +62,7 @@ struct aer_err_info { int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ int flags; unsigned int status; /* COR/UNCOR Error Status */ + unsigned int mask; /* COR/UNCOR Error Mask */ struct header_log_regs tlp; /* TLP Header */ }; -- cgit v1.2.3 From e7a0d92b19f438011ad76c41755b56ec2ef05f64 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:13:42 +0900 Subject: PCI: pcie, aer: report multiple/first error on a device Multiple bits might be set in the Uncorrectable Error Status register. But aer_print_error_source() only report a error of the lowest bit set in the error status register. So print strings for all bits unmasked and set. And check First Error Pointer to mark the error occured first. This FEP is not valid when the corresponing bit of the Uncorrectable Error Status register is not set, or unimplemented or undefined. Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 0db530db9439..436bf79e2739 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -61,6 +61,7 @@ struct aer_err_info { u16 id; int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ int flags; + int first; unsigned int status; /* COR/UNCOR Error Status */ unsigned int mask; /* COR/UNCOR Error Mask */ struct header_log_regs tlp; /* TLP Header */ -- cgit v1.2.3 From 3472a18773bc6661ea7f8de2b4172db7e00b67e6 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:16:00 +0900 Subject: PCI: pcie, aer: remove unused macros Cleanup. Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 436bf79e2739..c44d9e12d06e 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -16,9 +16,6 @@ #define AER_NONFATAL 0 #define AER_FATAL 1 #define AER_CORRECTABLE 2 -#define AER_UNCORRECTABLE 4 -#define AER_ERROR_MASK 0x001fffff -#define AER_ERROR(d) (d & AER_ERROR_MASK) /* Root Error Status Register Bits */ #define ROOT_ERR_STATUS_MASKS 0x0f -- cgit v1.2.3 From 273024ded7b364e1305a31bf4eb197870284f279 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:16:20 +0900 Subject: PCI: pcie, aer: flags to bits Compact struct and codes. Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index c44d9e12d06e..78c977cec479 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -40,10 +40,6 @@ PCI_ERR_UNC_UNX_COMP| \ PCI_ERR_UNC_MALF_TLP) -/* AER Error Info Flags */ -#define AER_TLP_HEADER_VALID_FLAG 0x00000001 -#define AER_MULTI_ERROR_VALID_FLAG 0x00000002 - struct header_log_regs { unsigned int dw0; unsigned int dw1; @@ -55,10 +51,17 @@ struct header_log_regs { struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; - u16 id; - int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ - int flags; - int first; + + unsigned int id:16; + + unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ + unsigned int __pad1:5; + unsigned int multi_error_valid:1; + + unsigned int first_error:5; + unsigned int __pad2:2; + unsigned int tlp_header_valid:1; + unsigned int status; /* COR/UNCOR Error Status */ unsigned int mask; /* COR/UNCOR Error Mask */ struct header_log_regs tlp; /* TLP Header */ -- cgit v1.2.3 From 79e4b89be81b5e53bc4cb51788ca7a45cadb4ef3 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:16:45 +0900 Subject: PCI: pcie, aer: change error print format Use dev_printk like format. Sample (real machine + dummy error injected by aer-inject): - Before: +------ PCI-Express Device Error ------+ Error Severity : Corrected PCIE Bus Error type : Data Link Layer Bad TLP : Receiver ID : 2800 VendorID=8086h, DeviceID=1096h, Bus=28h, Device=00h, Function=00h +------ PCI-Express Device Error ------+ Error Severity : Corrected PCIE Bus Error type : Data Link Layer Bad TLP : Bad DLLP : Receiver ID : 2801 VendorID=8086h, DeviceID=1096h, Bus=28h, Device=00h, Function=01h Error of this Agent(2801) is reported first - After: pcieport-driver 0000:00:02.0: AER: Multiple Corrected error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Corrected, type=Data Link Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00000040/00000000 e1000e 0000:28:00.0: [ 6] Bad TLP e1000e 0000:28:00.1: PCIE Bus Error: severity=Corrected, type=Data Link Layer, id=2801(Receiver ID) e1000e 0000:28:00.1: device [8086:1096] error status/mask=000000c0/00000000 e1000e 0000:28:00.1: [ 6] Bad TLP e1000e 0000:28:00.1: [ 7] Bad DLLP e1000e 0000:28:00.1: Error of this Agent(2801) is reported first Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 78c977cec479..f9979eb56fb2 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -124,6 +124,7 @@ extern void aer_delete_rootport(struct aer_rpc *rpc); extern int aer_init(struct pcie_device *dev); extern void aer_isr(struct work_struct *work); extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); +extern void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); extern irqreturn_t aer_irq(int irq, void *context); #ifdef CONFIG_ACPI -- cgit v1.2.3 From b1c089b7caf18905bd1d87136cf7b8c837254932 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Mon, 7 Sep 2009 17:16:59 +0900 Subject: PCI: pcie, aer: report all error before recovery This patch is required not to lost error records by action invoked on error recovery, such as slot reset etc. Following sample (real machine + dummy record injected by aer-inject) shows that record of 28:00.1 could not be retrieved by recovery of 28:00.0: - Before: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX - After: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2801(Receiver ID) e1000e 0000:28:00.1: device [8086:1096] error status/mask=00081000/00100000 e1000e 0000:28:00.1: [12] Poisoned TLP (First) e1000e 0000:28:00.1: [19] ECRC e1000e 0000:28:00.1: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: Error of this Agent(2801) is reported first e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/pci/pcie/aer/aerdrv.h') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index f9979eb56fb2..bd833ea3ba49 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -29,8 +29,6 @@ #define ERR_COR_ID(d) (d & 0xffff) #define ERR_UNCOR_ID(d) (d >> 16) -#define AER_SUCCESS 0 -#define AER_UNSUCCESS 1 #define AER_ERROR_SOURCES_MAX 100 #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ -- cgit v1.2.3