[parisc-linux] [PATCH] Update HPPA LinuxThread implementation and remove old .dpatch's

Carlos O'Donell carlos@baldric.uwo.ca
Sun, 2 Mar 2003 11:48:16 -0500


--7iMSBzlTiPOCCT2k
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline


debian-glibc,

With regards to GNU/Libc 2.3.x

The following is an update of LinuxThreads for HPPA, the changelog
is in progress and it has shown no regressions from a testing
standpoint. It actually does better in the gcc/g++ testsuite. This work
represents some long discussions between John David Anglin and myself,
please read the dpatch for more information about a specific patch.

Please remove the following .dpatch files from debian-glibc's CVS:

glibc23-02-hppa-min-kern-unwind-fde.dpatch
glibc23-03-hppa-mcontext.dpatch
glibc23-04-hppa-fcntl64.dpatch
glibc23-05-hppa-buildhack.dpatch
glibc23-06-hppa-tests.dpatch
glibc23-08-hppa-configure.dpatch

They are no longer being used and were superceeded by CVS patches.

Please replace glibc23-00-hppa-pthreads.dpatch with the attached version.
Please add glibc23-hppa-malloc-align.dpatch to the list of HPPA patches.

Quick Summary:
- LinuxThreads is now using a self-aligning lock.
- Malloc alignment has been moved back to 8 for optimal performance.

I did this work over 2-3 weeks ago, but due to current time constraints
it was behind "libgcc-compat hppa" on the queue, and thus didn't get
out. Randolph Chung recently fixed hppa's __clz_tab libgcc-compat issue,
which means I can take the 5 minutes I need to send this email :)

This code has been heavily tested by John and myself, though should any
HPPA users find problems, or have comments, please file bugs and/or 
contact me direclty or via any of the lists :)

Happy hacking.

Cheers,
Carlos.


--7iMSBzlTiPOCCT2k
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="glibc23-00-hppa-pthreads.dpatch"

#! /bin/sh -e

# DP: Description: HP/PARISC Linuxthreads impelementation "__ldcw_align()"
# DP: Author: Carlos O'Donell <carlos@baldric.uwo.ca>
# DP: Upstream status: Not submitted
# DP: Status Details: Writing changelogs. 
# DP: Date: March 1st, 2003

# This patch represents some of the work that John David Anglin and myself
# have done to try get Linuxthreads passing all the gcc/g++/glibc testsuites.
# The biggest issue we are trying to solve is that of the required 16-byte
# alignment for all locks. Since gcc cannot provide this type of alignment
# in all scenarios, John devised an interesting self-alinging solution.
# The implementation of that solution is presented here.
#	- Carlos.

if [ $# -ne 2 ]; then
    echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
    exit 1
fi
case "$1" in
    -patch) patch -d "$2" -f --no-backup-if-mismatch -p1 < $0;;
    -unpatch) patch -d "$2" -f --no-backup-if-mismatch -R -p1 < $0;;
    *)
	echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
	exit 1
esac
exit 0

# append the patch here and adjust the -p? flag in the patch calls.
diff -urN glibc-2.3.1.orig/linuxthreads/descr.h glibc-2.3.1/linuxthreads/descr.h
--- glibc-2.3.1.orig/linuxthreads/descr.h	2003-01-15 12:58:11.000000000 -0500
+++ glibc-2.3.1/linuxthreads/descr.h	2003-01-15 18:24:36.000000000 -0500
@@ -70,7 +70,7 @@
 /* Atomic counter made possible by compare_and_swap */
 struct pthread_atomic {
   long p_count;
-  int p_spinlock;
+  __atomic_lock_t p_spinlock;
 };
 
 
diff -urN glibc-2.3.1.orig/linuxthreads/pt-machine.c glibc-2.3.1/linuxthreads/pt-machine.c
--- glibc-2.3.1.orig/linuxthreads/pt-machine.c	2002-08-26 18:39:45.000000000 -0400
+++ glibc-2.3.1/linuxthreads/pt-machine.c	2003-01-15 18:24:36.000000000 -0500
@@ -19,7 +19,9 @@
 
 #define PT_EI
 
-extern long int testandset (int *spinlock);
+#include <pthread.h>
+
+extern long int testandset (__atomic_lock_t *spinlock);
 extern int __compare_and_swap (long int *p, long int oldval, long int newval);
 
 #include <pt-machine.h>
diff -urN glibc-2.3.1.orig/linuxthreads/pthread.c glibc-2.3.1/linuxthreads/pthread.c
--- glibc-2.3.1.orig/linuxthreads/pthread.c	2003-01-15 12:58:15.000000000 -0500
+++ glibc-2.3.1/linuxthreads/pthread.c	2003-01-15 18:24:36.000000000 -0500
@@ -296,9 +296,9 @@
   pthread_descr self;
 
   /* First of all init __pthread_handles[0] and [1] if needed.  */
-# if __LT_SPINLOCK_INIT != 0
-  __pthread_handles[0].h_lock = __LOCK_INITIALIZER;
-  __pthread_handles[1].h_lock = __LOCK_INITIALIZER;
+# ifdef __LT_INITIALIZER_NOT_ZERO
+  __pthread_handles[0].h_lock = __LOCK_ALT_INITIALIZER;
+  __pthread_handles[1].h_lock = __LOCK_ALT_INITIALIZER;
 # endif
 # ifndef SHARED
   /* Unlike in the dynamically linked case the dynamic linker has not
@@ -366,7 +366,7 @@
 # endif
   /* self->p_start_args need not be initialized, it's all zero.  */
   self->p_userstack = 1;
-# if __LT_SPINLOCK_INIT != 0
+# ifdef __LT_INITIALIZER_NOT_ZERO 
   self->p_resume_count = (struct pthread_atomic) __ATOMIC_INITIALIZER;
 # endif
   self->p_alloca_cutoff = __MAX_ALLOCA_CUTOFF;
@@ -380,9 +380,9 @@
 #else  /* USE_TLS */
 
   /* First of all init __pthread_handles[0] and [1].  */
-# if __LT_SPINLOCK_INIT != 0
-  __pthread_handles[0].h_lock = __LOCK_INITIALIZER;
-  __pthread_handles[1].h_lock = __LOCK_INITIALIZER;
+# ifdef __LT_INITIALIZER_NOT_ZERO
+  __pthread_handles[0].h_lock = __LOCK_ALT_INITIALIZER;
+  __pthread_handles[1].h_lock = __LOCK_ALT_INITIALIZER;
 # endif
   __pthread_handles[0].h_descr = &__pthread_initial_thread;
   __pthread_handles[1].h_descr = &__pthread_manager_thread;
diff -urN glibc-2.3.1.orig/linuxthreads/spinlock.c glibc-2.3.1/linuxthreads/spinlock.c
--- glibc-2.3.1.orig/linuxthreads/spinlock.c	2002-08-29 06:32:19.000000000 -0400
+++ glibc-2.3.1/linuxthreads/spinlock.c	2003-01-15 18:24:36.000000000 -0500
@@ -24,9 +24,9 @@
 #include "spinlock.h"
 #include "restart.h"
 
-static void __pthread_acquire(int * spinlock);
+static void __pthread_acquire(__atomic_lock_t * spinlock);
 
-static inline void __pthread_release(int * spinlock)
+static inline void __pthread_release(__atomic_lock_t * spinlock)
 {
   WRITE_MEMORY_BARRIER();
   *spinlock = __LT_SPINLOCK_INIT;
@@ -269,11 +269,11 @@
 struct wait_node {
   struct wait_node *next;	/* Next node in null terminated linked list */
   pthread_descr thr;		/* The thread waiting with this node */
-  int abandoned;		/* Atomic flag */
+  __atomic_lock_t abandoned;	/* Atomic flag */
 };
 
 static long wait_node_free_list;
-static int wait_node_free_list_spinlock;
+__pthread_lock_define_initialized(static, wait_node_free_list_spinlock);
 
 /* Allocate a new node from the head of the free list using an atomic
    operation, or else using malloc if that list is empty.  A fundamental
@@ -376,7 +376,7 @@
       if (self == NULL)
 	self = thread_self();
 
-      wait_node.abandoned = 0;
+      wait_node.abandoned = __LT_SPINLOCK_INIT;
       wait_node.next = (struct wait_node *) lock->__status;
       wait_node.thr = self;
       lock->__status = (long) &wait_node;
@@ -402,7 +402,7 @@
       wait_node.thr = self;
       newstatus = (long) &wait_node;
     }
-    wait_node.abandoned = 0;
+    wait_node.abandoned = __LT_SPINLOCK_INIT;
     wait_node.next = (struct wait_node *) oldstatus;
     /* Make sure the store in wait_node.next completes before performing
        the compare-and-swap */
@@ -451,7 +451,7 @@
       if (self == NULL)
 	self = thread_self();
 
-      p_wait_node->abandoned = 0;
+      p_wait_node->abandoned = __LT_SPINLOCK_INIT;
       p_wait_node->next = (struct wait_node *) lock->__status;
       p_wait_node->thr = self;
       lock->__status = (long) p_wait_node;
@@ -474,7 +474,7 @@
       p_wait_node->thr = self;
       newstatus = (long) p_wait_node;
     }
-    p_wait_node->abandoned = 0;
+    p_wait_node->abandoned = __LT_SPINLOCK_INIT;
     p_wait_node->next = (struct wait_node *) oldstatus;
     /* Make sure the store in wait_node.next completes before performing
        the compare-and-swap */
@@ -574,7 +574,7 @@
     while (p_node != (struct wait_node *) 1) {
       int prio;
 
-      if (p_node->abandoned) {
+      if (lock_held(&p_node->abandoned)) {
 	/* Remove abandoned node. */
 #if defined TEST_FOR_COMPARE_AND_SWAP
 	if (!__pthread_has_cas)
@@ -605,7 +605,7 @@
 	p_max_prio = p_node;
       }
 
-      /* This canno6 jump backward in the list, so no further read
+      /* This cannot jump backward in the list, so no further read
          barrier is needed. */
       pp_node = &p_node->next;
       p_node = *pp_node;
@@ -662,7 +662,7 @@
 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
 
 int __pthread_compare_and_swap(long * ptr, long oldval, long newval,
-                               int * spinlock)
+                               __atomic_lock_t * spinlock)
 {
   int res;
 
@@ -699,7 +699,7 @@
    - When nanosleep() returns, we try again, doing MAX_SPIN_COUNT
      sched_yield(), then sleeping again if needed. */
 
-static void __pthread_acquire(int * spinlock)
+static void __pthread_acquire(__atomic_lock_t * spinlock)	
 {
   int cnt = 0;
   struct timespec tm;
diff -urN glibc-2.3.1.orig/linuxthreads/spinlock.h glibc-2.3.1/linuxthreads/spinlock.h
--- glibc-2.3.1.orig/linuxthreads/spinlock.h	2001-05-24 19:36:35.000000000 -0400
+++ glibc-2.3.1/linuxthreads/spinlock.h	2003-01-15 18:24:36.000000000 -0500
@@ -33,14 +33,28 @@
 #endif
 #endif
 
+/* Define lock_held for all arches that don't need a modified copy. */
+#ifndef __LT_INITIALIZER_NOT_ZERO
+# define lock_held(p) *(p)
+#endif
+
+/* Initliazers for possibly complex structures */
+#ifdef __LT_INITIALIZER_NOT_ZERO
+# define __pthread_lock_define_initialized(CLASS,NAME) \
+	CLASS __atomic_lock_t NAME = __LT_SPINLOCK_ALT_INIT
+#else
+# define __pthread_lock_define_initialized(CLASS,NAME) \
+	CLASS __atomic_lock_t NAME
+#endif
+
 #if defined(TEST_FOR_COMPARE_AND_SWAP)
 
 extern int __pthread_has_cas;
 extern int __pthread_compare_and_swap(long * ptr, long oldval, long newval,
-                                      int * spinlock);
+                                      __atomic_lock_t * spinlock);
 
 static inline int compare_and_swap(long * ptr, long oldval, long newval,
-                                   int * spinlock)
+                                   __atomic_lock_t * spinlock)
 {
   if (__builtin_expect (__pthread_has_cas, 1))
     return __compare_and_swap(ptr, oldval, newval);
@@ -58,7 +72,7 @@
 
 static inline int
 compare_and_swap_with_release_semantics (long * ptr, long oldval,
-					 long newval, int * spinlock)
+					 long newval, __atomic_lock_t * spinlock)
 {
   return __compare_and_swap_with_release_semantics (ptr, oldval,
 						    newval);
@@ -67,7 +81,7 @@
 #endif
 
 static inline int compare_and_swap(long * ptr, long oldval, long newval,
-                                   int * spinlock)
+                                   __atomic_lock_t * spinlock)
 {
   return __compare_and_swap(ptr, oldval, newval);
 }
@@ -75,10 +89,10 @@
 #else
 
 extern int __pthread_compare_and_swap(long * ptr, long oldval, long newval,
-                                      int * spinlock);
+                                      __atomic_lock_t * spinlock);
 
 static inline int compare_and_swap(long * ptr, long oldval, long newval,
-                                   int * spinlock)
+                                   __atomic_lock_t * spinlock)
 {
   return __pthread_compare_and_swap(ptr, oldval, newval, spinlock);
 }
diff -urN glibc-2.3.1.orig/linuxthreads/sysdeps/hppa/pspinlock.c glibc-2.3.1/linuxthreads/sysdeps/hppa/pspinlock.c
--- glibc-2.3.1.orig/linuxthreads/sysdeps/hppa/pspinlock.c	2002-08-26 18:39:51.000000000 -0400
+++ glibc-2.3.1/linuxthreads/sysdeps/hppa/pspinlock.c	2003-01-15 18:26:51.000000000 -0500
@@ -24,15 +24,12 @@
 int
 __pthread_spin_lock (pthread_spinlock_t *lock)
 {
-  unsigned int val;
+	unsigned int *addr = __ldcw_align (lock);
+	
+	while (__ldcw (addr) == 0)
+		while (*addr == 0) ;
 
-  do
-    asm volatile ("ldcw %1,%0"
-		  : "=r" (val), "=m" (*lock)
-		  : "m" (*lock));
-  while (!val);
-
-  return 0;
+	return 0;
 }
 weak_alias (__pthread_spin_lock, pthread_spin_lock)
 
@@ -40,13 +37,9 @@
 int
 __pthread_spin_trylock (pthread_spinlock_t *lock)
 {
-  unsigned int val;
-
-  asm volatile ("ldcw %1,%0"
-		: "=r" (val), "=m" (*lock)
-		: "m" (*lock));
+	unsigned int *a = __ldcw_align (lock);
 
-  return val ? 0 : EBUSY;
+	return __ldcw (a) ? 0 : EBUSY;
 }
 weak_alias (__pthread_spin_trylock, pthread_spin_trylock)
 
@@ -54,7 +47,9 @@
 int
 __pthread_spin_unlock (pthread_spinlock_t *lock)
 {
-  *lock = 1;
+  unsigned int *a = __ldcw_align (lock);
+
+  *a = 1;
   return 0;
 }
 weak_alias (__pthread_spin_unlock, pthread_spin_unlock)
@@ -66,7 +61,9 @@
   /* We can ignore the `pshared' parameter.  Since we are busy-waiting
      all processes which can access the memory location `lock' points
      to can use the spinlock.  */
-  *lock = 1;
+  unsigned int *a = __ldcw_align (lock);
+
+  *a = 1;
   return 0;
 }
 weak_alias (__pthread_spin_init, pthread_spin_init)
diff -urN glibc-2.3.1.orig/linuxthreads/sysdeps/hppa/pt-machine.h glibc-2.3.1/linuxthreads/sysdeps/hppa/pt-machine.h
--- glibc-2.3.1.orig/linuxthreads/sysdeps/hppa/pt-machine.h	2002-08-26 18:39:51.000000000 -0400
+++ glibc-2.3.1/linuxthreads/sysdeps/hppa/pt-machine.h	2003-01-15 18:24:49.000000000 -0500
@@ -22,13 +22,13 @@
 #ifndef _PT_MACHINE_H
 #define _PT_MACHINE_H   1
 
+#include <sys/types.h>
 #include <bits/initspin.h>
 
 #ifndef PT_EI
 # define PT_EI extern inline
 #endif
 
-extern long int testandset (int *spinlock);
 extern int __compare_and_swap (long int *p, long int oldval, long int newval);
 
 /* Get some notion of the current stack.  Need not be exactly the top
@@ -36,27 +36,80 @@
 #define CURRENT_STACK_FRAME  stack_pointer
 register char * stack_pointer __asm__ ("%r30");
 
+/* Get/Set thread-specific pointer.  We have to call into the kernel to
+ * modify it, but we can read it in user mode.  */
+
+#define THREAD_SELF __get_cr27()
+
+static inline struct _pthread_descr_struct * __get_cr27(void)
+{
+	long cr27;
+	asm("mfctl %%cr27, %0" : "=r" (cr27) : );
+	return (struct _pthread_descr_struct *) cr27;
+}
+
+#define INIT_THREAD_SELF(descr, nr) __set_cr27(descr)
+
+static inline void __set_cr27(struct _pthread_descr_struct * cr27)
+{
+	asm(
+		"ble	0xe0(%%sr2, %%r0)\n\t"
+		"copy	%0, %%r26"
+	 : : "r" (cr27) : "r26" );
+}
+
+/* We want the OS to assign stack addresses.  */
+#define FLOATING_STACKS	1
+#define ARCH_STACK_MAX_SIZE	8*1024*1024
 
 /* The hppa only has one atomic read and modify memory operation,
    load and clear, so hppa spinlocks must use zero to signify that
-   someone is holding the lock.  */
+   someone is holding the lock.  The address used for the ldcw
+   semaphore must be 16-byte aligned.  */
+#define __ldcw(a) ({ \
+  unsigned int __ret;							\
+  __asm__ __volatile__("ldcw 0(%2),%0"					\
+                      : "=r" (__ret), "=m" (*(a)) : "r" (a));		\
+  __ret;								\
+})
+
+/* Because malloc only guarantees 8-byte alignment for malloc'd data,
+   and GCC only guarantees 8-byte alignment for stack locals, we can't
+   be assured of 16-byte alignment for atomic lock data even if we
+   specify "__attribute ((aligned(16)))" in the type declaration.  So,
+   we use a struct containing an array of four ints for the atomic lock
+   type and dynamically select the 16-byte aligned int from the array
+   for the semaphore.  */
+#define __PA_LDCW_ALIGNMENT 16
+#define __ldcw_align(a) ({ \
+  unsigned int __ret = (unsigned int) a;				\
+  if ((__ret & ~(__PA_LDCW_ALIGNMENT - 1)) < (unsigned int) a)		\
+    __ret = (__ret & ~(__PA_LDCW_ALIGNMENT - 1)) + __PA_LDCW_ALIGNMENT; \
+  (unsigned int *) __ret;						\
+})
 
-#define xstr(s) str(s)
-#define str(s) #s
 /* Spinlock implementation; required.  */
-PT_EI long int
-testandset (int *spinlock)
+PT_EI int
+__load_and_clear (__atomic_lock_t *spinlock)
 {
-  int ret;
+  unsigned int *a = __ldcw_align (spinlock);
 
-  __asm__ __volatile__(
-       "ldcw 0(%2),%0"
-       : "=r"(ret), "=m"(*spinlock)
-       : "r"(spinlock));
+  return __ldcw (a);
+}
 
-  return ret == 0;
+/* Emulate testandset */
+PT_EI long int
+testandset (__atomic_lock_t *spinlock)
+{
+  return (__load_and_clear(spinlock) == 0);
 }
-#undef str
-#undef xstr
 
+PT_EI int
+lock_held (__atomic_lock_t *spinlock)
+{
+  unsigned int *a = __ldcw_align (spinlock);
+
+  return *a == 0;
+}
+		
 #endif /* pt-machine.h */
diff -urN glibc-2.3.1.orig/linuxthreads/sysdeps/pthread/bits/libc-lock.h glibc-2.3.1/linuxthreads/sysdeps/pthread/bits/libc-lock.h
--- glibc-2.3.1.orig/linuxthreads/sysdeps/pthread/bits/libc-lock.h	2003-01-15 12:58:35.000000000 -0500
+++ glibc-2.3.1/linuxthreads/sysdeps/pthread/bits/libc-lock.h	2003-01-15 18:24:36.000000000 -0500
@@ -71,12 +71,12 @@
    initialized locks must be set to one due to the lack of normal
    atomic operations.) */
 
-#if __LT_SPINLOCK_INIT == 0
+#ifdef __LT_INITIALIZER_NOT_ZERO
 #  define __libc_lock_define_initialized(CLASS,NAME) \
-  CLASS __libc_lock_t NAME;
+  CLASS __libc_lock_t NAME = PTHREAD_MUTEX_INITIALIZER;
 #else
 #  define __libc_lock_define_initialized(CLASS,NAME) \
-  CLASS __libc_lock_t NAME = PTHREAD_MUTEX_INITIALIZER;
+  CLASS __libc_lock_t NAME;
 #endif
 
 #define __libc_rwlock_define_initialized(CLASS,NAME) \
diff -urN glibc-2.3.1.orig/linuxthreads/sysdeps/pthread/bits/pthreadtypes.h glibc-2.3.1/linuxthreads/sysdeps/pthread/bits/pthreadtypes.h
--- glibc-2.3.1.orig/linuxthreads/sysdeps/pthread/bits/pthreadtypes.h	2003-01-15 12:58:35.000000000 -0500
+++ glibc-2.3.1/linuxthreads/sysdeps/pthread/bits/pthreadtypes.h	2003-01-15 18:24:36.000000000 -0500
@@ -22,12 +22,14 @@
 #define __need_schedparam
 #include <bits/sched.h>
 
+typedef int __atomic_lock_t;
+
 /* Fast locks (not abstract because mutexes and conditions aren't abstract). */
 struct _pthread_fastlock
 {
-  long int __status;   /* "Free" or "taken" or head of waiting list */
-  int __spinlock;      /* Used by compare_and_swap emulation. Also,
-			  adaptive SMP lock stores spin count here. */
+  long int __status;		/* "Free" or "taken" or head of waiting list */
+  __atomic_lock_t __spinlock;	/* Used by compare_and_swap emulation. Also,
+				   adaptive SMP lock stores spin count here. */
 };
 
 #ifndef _PTHREAD_DESCR_DEFINED
diff -urN glibc-2.3.1.orig/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/initspin.h glibc-2.3.1/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/initspin.h
--- glibc-2.3.1.orig/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/initspin.h	2002-08-26 18:39:55.000000000 -0400
+++ glibc-2.3.1/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/initspin.h	2003-01-15 18:24:49.000000000 -0500
@@ -17,11 +17,23 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
+/* Initialize global spinlocks without cast, generally macro wrapped */
+#define __LT_SPINLOCK_ALT_INIT { { 1, 1, 1, 1, } }
+
 /* Initial value of a spinlock.  PA-RISC only implements atomic load
    and clear so this must be non-zero. */
-#define __LT_SPINLOCK_INIT 1
+#define __LT_SPINLOCK_INIT ((__atomic_lock_t) __LT_SPINLOCK_ALT_INIT)
+
+/* Macros for lock initializers, not using the above definition.
+   The above definition is not used in the case that static initializers
+   use this value. */
+#define __LOCK_INITIALIZER { __LT_SPINLOCK_ALT_INIT, 0 }
+#define __ATOMIC_INITIALIZER { 0, __LT_SPINLOCK_ALT_INIT }
+
+/* Used to initialize _pthread_fastlock's in non-static case */
+#define __LOCK_ALT_INITIALIZER ((struct _pthread_fastlock){ __LT_SPINLOCK_INIT, 0 })
+
+/* Tell the rest of the code that the initializer is non-zero without
+   explaining it's internal structure */
+#define __LT_INITIALIZER_NOT_ZERO
 
-/* Macros for lock initializers, using the above definition. */
-#define __LOCK_INITIALIZER { 0, __LT_SPINLOCK_INIT }
-#define __ALT_LOCK_INITIALIZER { 0, __LT_SPINLOCK_INIT }
-#define __ATOMIC_INITIALIZER { 0, __LT_SPINLOCK_INIT }
diff -urN glibc-2.3.1.orig/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/pthreadtypes.h glibc-2.3.1/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/pthreadtypes.h
--- glibc-2.3.1.orig/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/pthreadtypes.h	1969-12-31 19:00:00.000000000 -0500
+++ glibc-2.3.1/linuxthreads/sysdeps/unix/sysv/linux/hppa/bits/pthreadtypes.h	2003-01-15 18:24:49.000000000 -0500
@@ -0,0 +1,150 @@
+/* Linuxthreads - a simple clone()-based implementation of Posix        */
+/* threads for Linux.                                                   */
+/* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr)              */
+/*                                                                      */
+/* This program is free software; you can redistribute it and/or        */
+/* modify it under the terms of the GNU Library General Public License  */
+/* as published by the Free Software Foundation; either version 2       */
+/* of the License, or (at your option) any later version.               */
+/*                                                                      */
+/* This program is distributed in the hope that it will be useful,      */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
+/* GNU Library General Public License for more details.                 */
+
+#if !defined _BITS_TYPES_H && !defined _PTHREAD_H
+# error "Never include <bits/pthreadtypes.h> directly; use <sys/types.h> instead."
+#endif
+
+#ifndef _BITS_PTHREADTYPES_H
+#define _BITS_PTHREADTYPES_H	1
+
+#define __need_schedparam
+#include <bits/sched.h>
+
+/* We need 128-bit alignment for the ldcw semaphore.  At most, we are
+   assured of 64-bit alignment for stack locals and malloc'd data.  Thus,
+   we use a struct with four ints for the atomic lock type.  The locking
+   code will figure out which of the four to use for the ldcw semaphore.  */
+typedef volatile struct {
+  int lock[4];
+} __attribute__ ((aligned(16))) __atomic_lock_t;
+
+/* Fast locks (not abstract because mutexes and conditions aren't abstract). */
+struct _pthread_fastlock
+{
+  __atomic_lock_t __spinlock;	/* Used by compare_and_swap emulation.  Also,
+				   adaptive SMP lock stores spin count here. */
+  long int __status;		/* "Free" or "taken" or head of waiting list */
+};
+
+#ifndef _PTHREAD_DESCR_DEFINED
+/* Thread descriptors */
+typedef struct _pthread_descr_struct *_pthread_descr;
+# define _PTHREAD_DESCR_DEFINED
+#endif
+
+
+/* Attributes for threads.  */
+typedef struct __pthread_attr_s
+{
+  int __detachstate;
+  int __schedpolicy;
+  struct __sched_param __schedparam;
+  int __inheritsched;
+  int __scope;
+  size_t __guardsize;
+  int __stackaddr_set;
+  void *__stackaddr;
+  size_t __stacksize;
+} pthread_attr_t;
+
+
+/* Conditions (not abstract because of PTHREAD_COND_INITIALIZER */
+typedef struct
+{
+  struct _pthread_fastlock __c_lock; /* Protect against concurrent access */
+  _pthread_descr __c_waiting;        /* Threads waiting on this condition */
+} pthread_cond_t;
+
+
+/* Attribute for conditionally variables.  */
+typedef struct
+{
+  int __dummy;
+} pthread_condattr_t;
+
+/* Keys for thread-specific data */
+typedef unsigned int pthread_key_t;
+
+
+/* Mutexes (not abstract because of PTHREAD_MUTEX_INITIALIZER).  */
+/* (The layout is unnatural to maintain binary compatibility
+    with earlier releases of LinuxThreads.) */
+typedef struct
+{
+  int __m_reserved;               /* Reserved for future use */
+  int __m_count;                  /* Depth of recursive locking */
+  _pthread_descr __m_owner;       /* Owner thread (if recursive or errcheck) */
+  int __m_kind;                   /* Mutex kind: fast, recursive or errcheck */
+  struct _pthread_fastlock __m_lock; /* Underlying fast lock */
+} pthread_mutex_t;
+
+
+/* Attribute for mutex.  */
+typedef struct
+{
+  int __mutexkind;
+} pthread_mutexattr_t;
+
+
+/* Once-only execution */
+typedef int pthread_once_t;
+
+
+#ifdef __USE_UNIX98
+/* Read-write locks.  */
+typedef struct _pthread_rwlock_t
+{
+  struct _pthread_fastlock __rw_lock; /* Lock to guarantee mutual exclusion */
+  int __rw_readers;                   /* Number of readers */
+  _pthread_descr __rw_writer;         /* Identity of writer, or NULL if none */
+  _pthread_descr __rw_read_waiting;   /* Threads waiting for reading */
+  _pthread_descr __rw_write_waiting;  /* Threads waiting for writing */
+  int __rw_kind;                      /* Reader/Writer preference selection */
+  int __rw_pshared;                   /* Shared between processes or not */
+} pthread_rwlock_t;
+
+
+/* Attribute for read-write locks.  */
+typedef struct
+{
+  int __lockkind;
+  int __pshared;
+} pthread_rwlockattr_t;
+#endif
+
+#ifdef __USE_XOPEN2K
+/* POSIX spinlock data type.  */
+typedef __atomic_lock_t pthread_spinlock_t;
+
+/* POSIX barrier. */
+typedef struct {
+  struct _pthread_fastlock __ba_lock; /* Lock to guarantee mutual exclusion */
+  int __ba_required;                  /* Threads needed for completion */
+  int __ba_present;                   /* Threads waiting */
+  _pthread_descr __ba_waiting;        /* Queue of waiting threads */
+} pthread_barrier_t;
+
+/* barrier attribute */
+typedef struct {
+  int __pshared;
+} pthread_barrierattr_t;
+
+#endif
+
+
+/* Thread identifiers */
+typedef unsigned long int pthread_t;
+
+#endif	/* bits/pthreadtypes.h */
diff -urN glibc-2.3.1.orig/sysdeps/hppa/dl-fptr.c glibc-2.3.1/sysdeps/hppa/dl-fptr.c
--- glibc-2.3.1.orig/sysdeps/hppa/dl-fptr.c	2002-01-31 20:31:51.000000000 -0500
+++ glibc-2.3.1/sysdeps/hppa/dl-fptr.c	2003-01-15 18:24:36.000000000 -0500
@@ -30,7 +30,7 @@
 # include <pt-machine.h>
 
 /* Remember, we use 0 to mean that a lock is taken on PA-RISC. */
-static int __hppa_fptr_lock = 1;
+static __atomic_lock_t __hppa_fptr_lock = __LT_SPINLOCK_ALT_INIT;
 #endif
 
 /* Because ld.so is now versioned, these functions can be in their own
@@ -127,7 +127,7 @@
 #ifdef _LIBC_REENTRANT
   /* Release the lock.  Again, remember, zero means the lock is taken!  */
   if (mem == NULL)
-    __hppa_fptr_lock = 1;
+    __hppa_fptr_lock = __LT_SPINLOCK_INIT;
 #endif
 
   /* Set bit 30 to indicate to $$dyncall that this is a PLABEL. */
@@ -180,7 +180,7 @@
 
 #ifdef _LIBC_REENTRANT
   /* Release the lock. */
-  __hppa_fptr_lock = 1;
+  __hppa_fptr_lock = __LT_SPINLOCK_INIT;
 #endif
 }
 
@@ -190,6 +190,8 @@
   Elf32_Addr addr = (Elf32_Addr) address;
   struct hppa_fptr *f;
 
+  address = (void *)((unsigned long)address &~ 3); /* Clear the bottom two bits.  See make_fptr. */
+  
 #ifdef _LIBC_REENTRANT
   /* Make sure we are alone.  */
   while (testandset (&__hppa_fptr_lock));
@@ -204,7 +206,7 @@
 
 #ifdef _LIBC_REENTRANT
   /* Release the lock.   */
-  __hppa_fptr_lock = 1;
+  __hppa_fptr_lock = __LT_SPINLOCK_INIT;
 #endif
 
   return addr;

--7iMSBzlTiPOCCT2k
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="glibc23-hppa-malloc8.dpatch"

#! /bin/sh -e

# DP: Description: Return Malloc to 8-byte alignments. 
# DP: Author: Carlos O'Donell <carlos@baldric.uwo.ca>
# DP: Upstream status: Not submitted
# DP: Status Details: Writing Changelog, will get submitted in the next week or so... 
# DP: Date: March 1st, 2003

# Since GCC can't assure 16-byte alignment in all situations we have moved
# to the much more robust self aligning setup where an alignment of 8-bytes
# is perfectly okay. This patch reverts our malloc alignments to 8-bytes and
# allows us to have a more optimal situation from the viewpoint of the malloc
# algorithm.
#
#	- Carlos.

if [ $# -ne 2 ]; then
    echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
    exit 1
fi
case "$1" in
    -patch) patch -d "$2" -f --no-backup-if-mismatch -p1 < $0;;
    -unpatch) patch -d "$2" -f --no-backup-if-mismatch -R -p1 < $0;;
    *)
	echo >&2 "`basename $0`: script expects -patch|-unpatch as argument"
	exit 1
esac
exit 0

# append the patch here and adjust the -p? flag in the patch calls.
--- glibc-2.3.1/sysdeps/hppa/Makefile	2003-01-20 23:47:12.000000000 -0500
+++ glibc-2.3.1/sysdeps/hppa/Makefile	2003-01-20 23:47:19.000000000 -0500
@@ -22,10 +22,6 @@
 # CFLAGS-.os += -ffunction-sections
 LDFLAGS-c_pic.os += -Wl,--unique=.text*
 
-ifeq ($(subdir),malloc)
-CFLAGS-malloc.c += -DMALLOC_ALIGNMENT=16
-endif
-
 ifeq ($(subdir),elf)
 CFLAGS-rtld.c += -mdisable-fpregs
 dl-routines += dl-symaddr dl-fptr

--7iMSBzlTiPOCCT2k--