[parisc-linux] Mysterious hangs with parisc (a send_group_sig_info() analysis)

Joel Soete soete.joel at tiscali.be
Fri Jul 21 14:05:32 MDT 2006


Hello all,

Kyle McMartin wrote:
--- snip ---
> 
> James' fix only applied to UP builds. :)
> 
Continuing investigation on this:

BUG: soft lockup detected on CPU#0!
Backtrace:
  [<0000000010112e70>] dump_stack+0x18/0x28
  [<0000000010176088>] softlockup_tick+0x120/0x160
  [<00000000101533b0>] run_local_timers+0x28/0x38
  [<0000000010153c48>] update_process_times+0x58/0xd8
  [<000000001011d898>] smp_do_timer+0x70/0x80
  [<0000000010114094>] timer_interrupt+0xd4/0x1e0
  [<0000000010176204>] handle_IRQ_event+0x74/0xd0
  [<0000000010176318>] __do_IRQ+0xb8/0x270
  [<0000000010114a6c>] do_cpu_irq_mask+0x11c/0x1e8
  [<0000000010104074>] intr_return+0x0/0x1c

smp n4k pb.

I figure out that with the kernel 2.6.17-pa3 compiled with gcc-3.3 this system run my stress test during 8 full days without pb.
Otoh when the same src were compiled with gcc-4.x the system vanished (panicing?) eracticaly after between  1 to 3 tree days of test.

So either gcc-3.3 produced the right stuff or hiden a bug?
So I managed to produce *.s files for gcc-3.3 and gcc-4.1 to attempt comparison.

But this Backtrace didn't show me any cpu status (even after I updated the firmware of pdc and gsp to their latest releases and 
tried the latest mingo's patch for lock validator?). Even thought Mike recalled me this old report: 
<http://lists.parisc-linux.org/pipermail/parisc-linux/2006-March/028603.html>
  IAOQ[0]: _read_lock+0x18/0x30
  IAOQ[1]: _read_lock+0x8/0x30
  RP(r2): send_group_sig_info+0x3c/0xb0

I so used this as starting point of investigation:
--- snip ---
/*
  * This is the entry point for "process-wide" signals.
  * They will go to an appropriate thread in the thread group.
  */
int
send_group_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
         int ret;
         read_lock(&tasklist_lock);
         ret = group_send_sig_info(sig, info, p);
         read_unlock(&tasklist_lock);
         return ret;
}
--- snip ---

./include/linux/spinlock.h:
--- snip ---
#define read_lock(lock)                      _read_lock(lock)

/*
  * We inline the unlock functions in the nondebug case:
  */
#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
# define spin_unlock(lock)              _spin_unlock(lock)
# define read_unlock(lock)              _read_unlock(lock)
# define write_unlock(lock)             _write_unlock(lock)
#else
# define spin_unlock(lock)              __raw_spin_unlock(&(lock)->raw_lock)
# define read_unlock(lock)              __raw_read_unlock(&(lock)->raw_lock)
# define write_unlock(lock)             __raw_write_unlock(&(lock)->raw_lock)
#endif

--- snip ---

Q1: according to this comment why don't we 'inline' read_lock() as read_unlock?

Lets have a look to the pre-compiled (.i) file:
precompiled:
--- snip ---
int
send_group_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
  int ret;
  _read_lock(&tasklist_lock);
  ret = group_send_sig_info(sig, info, p);
  __raw_read_unlock(&(&tasklist_lock)->raw_lock);
  return ret;
}
--- snip ---
static __inline__ __attribute__((always_inline)) void __raw_read_unlock(raw_rwlock_t *rw)
{
  __raw_spin_lock_flags(&rw->lock, 0);

  rw->counter--;

  __raw_spin_unlock(&rw->lock);
}
--- snip ---
static inline __attribute__((always_inline)) void __raw_spin_lock_flags(raw_spinlock_t *x,
       unsigned long flags)
{
  volatile unsigned int *a;


  __asm__ __volatile__("":::"memory");
  a = ((volatile unsigned int *)x);

  while (({ unsigned __ret; __asm__ __volatile__("ldcw,co" " 0(%1),%0" : "=r" (__ret) : "r" (a)); __ret; }) == 0)
   __asm__("; flag_0");
   while (*a == 0)
    if (flags & 0x00000001) {
     __asm__ __volatile__("ssm %0,%%r0\n" : : "i" (0x00000001) : "memory" );
     __asm__ __volatile__("":::"memory");
     __asm__ __volatile__("rsm %0,%%r0\n" : : "i" (0x00000001) : "memory" );
    } else
     __asm__ __volatile__("; cpu_relax\n\tnop\n\tnop\n" : : );

  __asm__ __volatile__("":::"memory");
}
--- snip ---

to be sure am i on the right place and make some asm highlighting:
  # diff -Nau spinlock.h.Orig spinlock.h
--- spinlock.h.Orig     2006-07-20 07:54:26.000000000 +0200
+++ spinlock.h  2006-07-21 19:37:16.000000000 +0200
@@ -20,8 +20,10 @@
  {
         volatile unsigned int *a;

-       mb();
+/*     mb(); */
+       __asm__ __volatile__("; raw_spin_lock_flags_in":::"memory");
         a = __ldcw_align(x);
+
         while (__ldcw(a) == 0)
                 while (*a == 0)
                         if (flags & PSW_SM_I) {
@@ -30,16 +32,19 @@
                                 local_irq_disable();
                         } else
                                 cpu_relax();
-       mb();
+/*     mb(); */
+       __asm__ __volatile__("; raw_spin_lock_flags_out":::"memory");
  }

  static inline void __raw_spin_unlock(raw_spinlock_t *x)
  {
         volatile unsigned int *a;
-       mb();
+/*     mb(); */
+       __asm__ __volatile__("; raw_spin_unlock_in":::"memory");
         a = __ldcw_align(x);
         *a = 1;
-       mb();
+/*     mb(); */
+       __asm__ __volatile__("; raw_spin_unlock_out":::"memory");
  }

  static inline int __raw_spin_trylock(raw_spinlock_t *x)

--- processor.h.Orig    2006-07-20 07:54:46.000000000 +0200
+++ processor.h 2006-07-20 08:14:30.000000000 +0200
@@ -355,7 +355,11 @@
  }
  #endif

+/*
  #define cpu_relax()    barrier()
+ */
+#define cpu_relax()    __asm__ __volatile__("; cpu_relax\n\tnop\n\tnop\n" : : )
+

  #endif /* __ASSEMBLY__ */


which obvioulsy produced different s files:
  # diff -y SGSI_gcc33.s.ban_noFlag0 SGSI_gcc41.s.ban_noFlag1
         .section        .text.send_group_sig_info,"ax", at progb           .section        .text.send_group_sig_info,"ax", at progb
         .align 8                                                        .align 8
.globl send_group_sig_info                                      .globl send_group_sig_info
         .type   send_group_sig_info, @function                          .type   send_group_sig_info, @function
send_group_sig_info:                                            send_group_sig_info:
         .PROC                                                           .PROC
         .CALLINFO FRAME=144,CALLS,SAVE_RP,ENTRY_GR=6          |         .CALLINFO FRAME=160,CALLS,SAVE_RP,ENTRY_GR=7
         .ENTRY                                                          .ENTRY
                                                               >         addil LT'tasklist_lock,%r27
         std %r2,-16(%r30)                                               std %r2,-16(%r30)
         std,ma %r7,144(%r30)                                  |         std,ma %r8,160(%r30)
         std %r6,-136(%r30)                                    |         copy %r24,%r8
         std %r5,-128(%r30)                                    <
         std %r4,-120(%r30)                                    <
         copy %r27,%r4                                         <
         ldo -48(%r30),%r29                                              ldo -48(%r30),%r29
         copy %r25,%r6                                         |         std %r7,-152(%r30)
         extrd,s %r26,63,32,%r5                                |         copy %r25,%r7
         addil LT'tasklist_lock,%r27                           |         std %r6,-144(%r30)
         ldd RT'tasklist_lock(%r1),%r1                         |         extrd,s %r26,63,32,%r6
         copy %r1,%r26                                         |         std %r5,-136(%r30)
                                                               >         ldd RT'tasklist_lock(%r1),%r5
                                                               >         copy %r5,%r26
                                                               >         std %r4,-128(%r30)
         b,l _read_lock,%r2                                              b,l _read_lock,%r2
         copy %r24,%r7                                         |         copy %r27,%r4
         copy %r6,%r25                                         <
         copy %r7,%r24                                         <
         copy %r4,%r27                                                   copy %r4,%r27
                                                               >         copy %r6,%r26
                                                               >         copy %r27,%r4
                                                               >         copy %r7,%r25
         ldo -48(%r30),%r29                                              ldo -48(%r30),%r29
         b,l group_send_sig_info,%r2                                     b,l group_send_sig_info,%r2
         copy %r5,%r26                                         |         copy %r8,%r24
         addil LT'tasklist_lock,%r4                            |         copy %r4,%r27
         ldd RT'tasklist_lock(%r1),%r20                        |         copy %r28,%r19
#APP                                                            #APP
         ; raw_spin_lock_flags_in                                        ; raw_spin_lock_flags_in
         ldcw,co 0(%r20),%r19                                  <
#NO_APP                                                         #NO_APP
         cmpib,<>,n 0,%r19,.L895                               | .L944:
.L890:                                                        | #APP
         ldw 0(%r20),%r19                                      |         ldcw,co 0(%r5),%r28
         cmpib,<>,n 0,%r19,.L897                               | #NO_APP
.L889:                                                        |         cmpib,<>,n 0,%r28,.L946
                                                               > .L945:
                                                               >         ldw 0(%r5),%r28
                                                               >         cmpb,*<> %r0,%r28,.L944
                                                               >         nop
#APP                                                            #APP
         ; cpu_relax                                                     ; cpu_relax
         nop                                                             nop
         nop                                                             nop

#NO_APP                                                         #NO_APP
         ldw 0(%r20),%r19                                      |         b,n .L945
         cmpib,= 0,%r19,.L889                                  | .L946:
         nop                                                   <
.L897:                                                        <
#APP                                                          <
         ldcw,co 0(%r20),%r19                                  <
#NO_APP                                                       <
         cmpib,= 0,%r19,.L890                                  <
         nop                                                   <
.L895:                                                        <
#APP                                                            #APP
         ; raw_spin_lock_flags_out                                       ; raw_spin_lock_flags_out
#NO_APP                                                         #NO_APP
         ldw 4(%r20),%r19                                      |         addil LT'tasklist_lock,%r27
         ldo -1(%r19),%r19                                     |         ldd RT'tasklist_lock(%r1),%r31
         stw %r19,4(%r20)                                      |         ldw 4(%r31),%r28
                                                               >         ldo -1(%r28),%r28
                                                               >         stw %r28,4(%r31)
#APP                                                            #APP
         ; raw_spin_unlock_in                                            ; raw_spin_unlock_in
#NO_APP                                                         #NO_APP
         ldi 1,%r19                                            |         ldi 1,%r28
         stw %r19,0(%r20)                                      |         stw %r28,0(%r5)
#APP                                                            #APP
         ; raw_spin_unlock_out                                           ; raw_spin_unlock_out
#NO_APP                                                         #NO_APP
         ldd -120(%r30),%r4                                    |         copy %r19,%r28
         ldd -128(%r30),%r5                                    |         ldd -176(%r30),%r2
         ldd -160(%r30),%r2                                    |         ldd -152(%r30),%r7
         ldd -136(%r30),%r6                                    |         ldd -144(%r30),%r6
                                                               >         ldd -136(%r30),%r5
                                                               >         ldd -128(%r30),%r4
         bve (%r2)                                                       bve (%r2)
         ldd,mb -144(%r30),%r7                                 |         ldd,mb -160(%r30),%r8
         .EXIT                                                           .EXIT
         .PROCEND                                                        .PROCEND
         .size   send_group_sig_info, .-send_group_sig_info              .size   send_group_sig_info, .-send_group_sig_info

And here are a lot of questions:
	Q2: some diff in routine like 2 times ldcw with gcc-3 and only one with gcc-4 (may be ok?)

	Q3: a 64bit comparison (cmpb,*<> %r0,%r28,.L944) introduced with gcc-4?

	Q4: imho the most important: with 2 gcc where is the couple of ssm/rsm that jejb introduced in his patch:

<http://lists.parisc-linux.org/pipermail/parisc-linux-cvs/2005-October/036211.html>
--- linux-2.6/include/asm-parisc/spinlock.h	2005/09/14 14:42:11	1.16
+++ linux-2.6/include/asm-parisc/spinlock.h	2005/10/20 16:07:04	1.17
@@ -11,18 +11,25 @@
  	return *a == 0;
  }

-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
  #define __raw_spin_unlock_wait(x) \
  		do { cpu_relax(); } while (__raw_spin_is_locked(x))

-static inline void __raw_spin_lock(raw_spinlock_t *x)
+static inline void __raw_spin_lock_flags(raw_spinlock_t *x,
+					 unsigned long flags)
  {
  	volatile unsigned int *a;

  	mb();
  	a = __ldcw_align(x);
  	while (__ldcw(a) == 0)
-		while (*a == 0);
+		while (*a == 0)
+			if (flags & PSW_SM_I) {
+				local_irq_enable();
+				cpu_relax();
+				local_irq_disable();
+			} else
+				cpu_relax();
  	mb();
  }

?

Mike, feel free to complete this summary if something looks like bad, not complete or not clear ;-)

Thanks for help,
	Joel



More information about the parisc-linux mailing list