From 6a45716abbf9dc0f397946306db1f78b2eba3086 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 2 May 2013 20:41:45 +0000 Subject: parisc: fix partly 16/64k PAGE_SIZE boot This patch fixes partly PAGE_SIZEs of 16K or 64K by adjusting the assembler PTE lookup code and the assembler TEMPALIAS code. Furthermore some data alignments for PAGE_SIZE have been limited to 4K (or less) to not waste too much memory with greater page sizes. As a side note, the palo loader can (currently) only handle up to 10 ELF segments which is fixed with tighter aligning as well. My testings indicated that the ldci command in the sba iommu coding needed adjustment by the PAGE_SHIFT value and that the I/O PDIR Page size was only set to 4K for my machine (C3000). All this fixes partly the boot, but there are still quite some caching problems left. Examples are e.g. the symbios logic driver which is failing: sym0: <896> rev 0x7 at pci 0000:00:0f.0 irq 69 sym0: PA-RISC Firmware, ID 7, Fast-40, SE, parity checking CACHE TEST FAILED: DMA error (dstat=0x81).sym0: CACHE INCORRECTLY CONFIGURED. and the tulip network driver which doesn't seem to work correctly either: Sending BOOTP requests .net eth0: Setting full-duplex based on MII#1 link partner capability of 05e1 ..... timed out! Beside those kernel fixes glibc will need fixes too to be able to handle >4K page sizes. Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/parisc/kernel/entry.S') diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index f33201bf8977..aa486e46ab97 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -400,7 +400,15 @@ #if PT_NLEVELS == 3 extru \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index #else +# if defined(CONFIG_64BIT) + extrd,u \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index + #else + # if PAGE_SIZE > 4096 + extru \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index + # else extru \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index + # endif +# endif #endif dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ copy %r0,\pte @@ -615,7 +623,7 @@ .text - .align PAGE_SIZE + .align 4096 ENTRY(fault_vector_20) /* First vector is invalid (0) */ -- cgit v1.2.3 From bbbfde782084b4f0d85ddffb88f1cf4650ff40e4 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 4 May 2013 19:16:41 +0000 Subject: parisc: use long branch in fork_like macro The "b" branch instruction used in the fork_like macro only can handle 17-bit pc-relative offsets. This fails with an out of range offset with some .config files. Rewrite to use the "be" instruction which can branch to any address in a space. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/parisc/kernel/entry.S') diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index aa486e46ab97..36f4f1dcb778 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1702,7 +1702,8 @@ ENTRY(sys_\name\()_wrapper) ldo TASK_REGS(%r1),%r1 reg_save %r1 mfctl %cr27, %r28 - b sys_\name + ldil L%sys_\name, %r31 + be R%sys_\name(%sr4,%r31) STREG %r28, PT_CR27(%r1) ENDPROC(sys_\name\()_wrapper) .endm -- cgit v1.2.3 From c207a76bf155cb5cf24cf849c08f6555e9180594 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 7 May 2013 00:07:25 +0000 Subject: parisc: only re-enable interrupts if we need to schedule or deliver signals when returning to userspace Helge and I have found that we have a kernel stack overflow problem which causes a variety of random failures. Currently, we re-enable interrupts when returning from an external interrupt incase we need to schedule or delivery signals. As a result, a potentially unlimited number of interrupts can occur while we are running on the kernel stack. It is very limited in space (currently, 16k). This change defers enabling interrupts until we have actually decided to schedule or delivery signals. This only occurs when we about to return to userspace. This limits the number of interrupts on the kernel stack to one. In other cases, interrupts remain disabled until the final return from interrupt (rfi). Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/parisc/kernel/entry.S') diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 36f4f1dcb778..3f3326d876f7 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -833,11 +833,6 @@ ENTRY(syscall_exit_rfi) STREG %r19,PT_SR7(%r16) intr_return: - /* NOTE: Need to enable interrupts incase we schedule. */ - ssm PSW_SM_I, %r0 - -intr_check_resched: - /* check for reschedule */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ @@ -864,6 +859,11 @@ intr_check_sig: LDREG PT_IASQ1(%r16), %r20 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ + /* NOTE: We need to enable interrupts if we have to deliver + * signals. We used to do this earlier but it caused kernel + * stack overflows. */ + ssm PSW_SM_I, %r0 + copy %r0, %r25 /* long in_syscall = 0 */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ @@ -915,6 +915,10 @@ intr_do_resched: cmpib,COND(=) 0, %r20, intr_do_preempt nop + /* NOTE: We need to enable interrupts if we schedule. We used + * to do this earlier but it caused kernel stack overflows. */ + ssm PSW_SM_I, %r0 + #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif -- cgit v1.2.3 From 200c880420a2c02a0899120ce52d801fad705b90 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 May 2013 20:25:42 +0000 Subject: parisc: implement irq stacks Default kernel stack size on parisc is 16k. During tests we found that the kernel stack can easily grow beyond 13k, which leaves 3k left for irq processing. This patch adds the possibility to activate an additional stack of 16k per CPU which is being used during irq processing. This implementation does not yet uses this irq stack for the irq bh handler. The assembler code for call_on_stack was heavily cleaned up by John David Anglin. CC: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/parisc/kernel/entry.S') diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 3f3326d876f7..4bb96ad9b0b1 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -2010,6 +2010,47 @@ ftrace_stub: ENDPROC(return_to_handler) #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_IRQSTACKS +/* void call_on_stack(unsigned long param1, void *func, + unsigned long new_stack) */ +ENTRY(call_on_stack) + copy %sp, %r1 + + /* Regarding the HPPA calling conventions for function pointers, + we assume the PIC register is not changed across call. For + CONFIG_64BIT, the argument pointer is left to point at the + argument region allocated for the call to call_on_stack. */ +# ifdef CONFIG_64BIT + /* Switch to new stack. We allocate two 128 byte frames. */ + ldo 256(%arg2), %sp + /* Save previous stack pointer and return pointer in frame marker */ + STREG %rp, -144(%sp) + /* Calls always use function descriptor */ + LDREG 16(%arg1), %arg1 + bve,l (%arg1), %rp + STREG %r1, -136(%sp) + LDREG -144(%sp), %rp + bve (%rp) + LDREG -136(%sp), %sp +# else + /* Switch to new stack. We allocate two 64 byte frames. */ + ldo 128(%arg2), %sp + /* Save previous stack pointer and return pointer in frame marker */ + STREG %r1, -68(%sp) + STREG %rp, -84(%sp) + /* Calls use function descriptor if PLABEL bit is set */ + bb,>=,n %arg1, 30, 1f + depwi 0,31,2, %arg1 + LDREG 0(%arg1), %arg1 +1: + be,l 0(%sr4,%arg1), %sr0, %r31 + copy %r31, %rp + LDREG -84(%sp), %rp + bv (%rp) + LDREG -68(%sp), %sp +# endif /* CONFIG_64BIT */ +ENDPROC(call_on_stack) +#endif /* CONFIG_IRQSTACKS */ get_register: /* -- cgit v1.2.3 From f0a18819e261afc5fdbd8c5c6f9943123c5461ba Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 10 May 2013 23:21:38 +0000 Subject: parisc: fix SMP races when updating PTE and TLB entries in entry.S Currently, race conditions exist in the handling of TLB interruptions in entry.S. In particular, dirty bit updates can be lost if an accessed interruption occurs just after the dirty bit interruption on a different cpu. Lost dirty bit updates result in user pages not being flushed and general system instability. This change adds lock and unlock macros to synchronize all PTE and TLB updates done in entry.S. As a result, userspace stability is significantly improved. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 155 ++++++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 72 deletions(-) (limited to 'arch/parisc/kernel/entry.S') diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 4bb96ad9b0b1..ae27cb6ce19a 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -452,9 +452,41 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm + /* Acquire pa_dbit_lock lock. */ + .macro dbit_lock spc,tmp,tmp1 +#ifdef CONFIG_SMP + cmpib,COND(=),n 0,\spc,2f + load32 PA(pa_dbit_lock),\tmp +1: LDCW 0(\tmp),\tmp1 + cmpib,COND(=) 0,\tmp1,1b + nop +2: +#endif + .endm + + /* Release pa_dbit_lock lock without reloading lock address. */ + .macro dbit_unlock0 spc,tmp +#ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + stw \spc,0(\tmp) +#endif + .endm + + /* Release pa_dbit_lock lock. */ + .macro dbit_unlock1 spc,tmp +#ifdef CONFIG_SMP + load32 PA(pa_dbit_lock),\tmp + dbit_unlock0 \spc,\tmp +#endif + .endm + /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep ptep,pte,tmp,tmp1 + .macro update_ptep spc,ptep,pte,tmp,tmp1 +#ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + LDREG 0(\ptep),\pte +#endif ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 @@ -463,7 +495,11 @@ /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty ptep,pte,tmp + .macro update_dirty spc,ptep,pte,tmp +#ifdef CONFIG_SMP + or,COND(=) %r0,\spc,%r0 + LDREG 0(\ptep),\pte +#endif ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte STREG \pte,0(\ptep) @@ -1111,11 +1147,13 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1135,11 +1173,13 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1161,7 +1201,8 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1172,6 +1213,7 @@ dtlb_miss_11: idtlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1192,7 +1234,8 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1204,6 +1247,7 @@ nadtlb_miss_11: idtlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1224,13 +1268,15 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1250,13 +1296,15 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 idtlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1357,11 +1405,13 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1379,11 +1429,13 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1405,7 +1457,8 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1416,6 +1469,7 @@ itlb_miss_11: iitlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1427,7 +1481,8 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot @@ -1438,6 +1493,7 @@ naitlb_miss_11: iitlbp prot,(%sr1,va) mtsp t0, %sr1 /* Restore sr1 */ + dbit_unlock1 spc,t0 rfir nop @@ -1459,13 +1515,15 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1477,13 +1535,15 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - update_ptep ptp,pte,t0,t1 + dbit_lock spc,t0,t1 + update_ptep spc,ptp,pte,t0,t1 make_insert_tlb spc,pte,prot f_extend pte,t0 iitlbt pte,prot + dbit_unlock1 spc,t0 rfir nop @@ -1507,29 +1567,13 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_20w - load32 PA(pa_dbit_lock),t0 - -dbit_spin_20w: - LDCW 0(t0),t1 - cmpib,COND(=) 0,t1,dbit_spin_20w - nop - -dbit_nolock_20w: -#endif - update_dirty ptp,pte,t1 + dbit_lock spc,t0,t1 + update_dirty spc,ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_20w - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_20w: -#endif + dbit_unlock0 spc,t0 rfir nop @@ -1543,18 +1587,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_11 - load32 PA(pa_dbit_lock),t0 - -dbit_spin_11: - LDCW 0(t0),t1 - cmpib,= 0,t1,dbit_spin_11 - nop - -dbit_nolock_11: -#endif - update_dirty ptp,pte,t1 + dbit_lock spc,t0,t1 + update_dirty spc,ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1565,13 +1599,7 @@ dbit_nolock_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_11 - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_11: -#endif + dbit_unlock0 spc,t0 rfir nop @@ -1583,32 +1611,15 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nolock_20 - load32 PA(pa_dbit_lock),t0 - -dbit_spin_20: - LDCW 0(t0),t1 - cmpib,= 0,t1,dbit_spin_20 - nop - -dbit_nolock_20: -#endif - update_dirty ptp,pte,t1 + dbit_lock spc,t0,t1 + update_dirty spc,ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 idtlbt pte,prot - -#ifdef CONFIG_SMP - cmpib,COND(=),n 0,spc,dbit_nounlock_20 - ldi 1,t1 - stw t1,0(t0) - -dbit_nounlock_20: -#endif + dbit_unlock0 spc,t0 rfir nop -- cgit v1.2.3 From b63a2bbc0b9b106a93e11952ab057e2408f2eb02 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 20 May 2013 16:42:53 +0000 Subject: parisc: make interrupt and interruption stack allocation reentrant The get_stack_use_cr30 and get_stack_use_r30 macros allocate a stack frame for external interrupts and interruptions requiring a stack frame. They are currently not reentrant in that they save register context before the stack is set or adjusted. I have observed a number of system crashes where there was clear evidence of stack corruption during interrupt processing, and as a result register corruption. Some interruptions can still occur during interruption processing, however external interrupts are disabled and data TLB misses don't occur for absolute accesses. So, it's not entirely clear what triggers this issue. Also, if an interruption occurs when Q=0, it is generally not possible to recover as the shadowed registers are not copied. The attached patch reworks the get_stack_use_cr30 and get_stack_use_r30 macros to allocate stack before doing register saves. The new code is a couple of instructions shorter than the old implementation. Thus, it's an improvement even if it doesn't fully resolve the stack corruption issue. Based on limited testing, it improves SMP system stability. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/parisc/kernel/entry.S') diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index ae27cb6ce19a..e8f07dd28401 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -65,15 +65,11 @@ rsm PSW_SM_I, %r0 /* barrier for "Relied upon Translation */ mtsp %r0, %sr4 mtsp %r0, %sr5 - mfsp %sr7, %r1 - or,= %r0,%r1,%r0 /* Only save sr7 in sr3 if sr7 != 0 */ - mtsp %r1, %sr3 + mtsp %r0, %sr6 tovirt_r1 %r29 load32 KERNEL_PSW, %r1 rsm PSW_SM_QUIET,%r0 /* second "heavy weight" ctl op */ - mtsp %r0, %sr6 - mtsp %r0, %sr7 mtctl %r0, %cr17 /* Clear IIASQ tail */ mtctl %r0, %cr17 /* Clear IIASQ head */ mtctl %r1, %ipsw @@ -119,17 +115,20 @@ /* we save the registers in the task struct */ + copy %r30, %r17 mfctl %cr30, %r1 + ldo THREAD_SZ_ALGN(%r1), %r30 + mtsp %r0,%sr7 + mtsp %r16,%sr3 tophys %r1,%r9 LDREG TI_TASK(%r9), %r1 /* thread_info -> task_struct */ tophys %r1,%r9 ldo TASK_REGS(%r9),%r9 - STREG %r30, PT_GR30(%r9) + STREG %r17,PT_GR30(%r9) STREG %r29,PT_GR29(%r9) STREG %r26,PT_GR26(%r9) + STREG %r16,PT_SR7(%r9) copy %r9,%r29 - mfctl %cr30, %r1 - ldo THREAD_SZ_ALGN(%r1), %r30 .endm .macro get_stack_use_r30 @@ -137,10 +136,12 @@ /* we put a struct pt_regs on the stack and save the registers there */ tophys %r30,%r9 - STREG %r30,PT_GR30(%r9) + copy %r30,%r1 ldo PT_SZ_ALGN(%r30),%r30 + STREG %r1,PT_GR30(%r9) STREG %r29,PT_GR29(%r9) STREG %r26,PT_GR26(%r9) + STREG %r16,PT_SR7(%r9) copy %r9,%r29 .endm -- cgit v1.2.3