Thank you for the patch.
I reviewed it and applied several changes - see https://github.com/ivmai/bdwgc/commit/757af8aa17ed107ff2915f93967087835c0100dc :
* fix version check: major >= 2 && minor >= 19 -> major > 2 || major == 2 && minor >= 19
* fix scope of workaround application - only Linux/x64 with Glibc (instead of Linux on all CPUs)
* avoid extra configure checks (currently all major targets could be compiled even without running configure&make)
Please test new code.
But still I don't know whether the proposed solution is good enough to be in master. The questions are:
* is there a link pointing to a lock elision bug implementation in Glibc? If not, it is better to register such
* this workaround simply disables lock elision (causing potentially degradation of performance). which pthread primitive causes malfunction? if trylock then we could avoid it.
Post by Paul BonePost by Paul BoneHi.
I wrote earlier about a bug I was trying to find regarding lock elision in
glibc 2.19 that affects Boehm GC. I beleive that this affects all
applications that use Boehm GC, not just Mercury, as mono applications have
also been crashing since I upgraded glibc. I would like some feedback and
https://github.com/PaulBone/bdwgc/tree/fix_tsx_bug
The patch I sent had a bug. This one fixes it and also enables the
workaround for versions greater than 2.19
From 3c22928504e6a0d306d39a60686e963bcd04ede4 Mon Sep 17 00:00:00 2001
Date: Wed, 25 Jun 2014 11:17:50 +1000
Subject: [PATCH] Workaround Linux NTPL lock elision bug.
glibc 2.19 on Linux x86-64 platforms includes support for lock elision,
by using Intel's TSX support when it is available. Without modifying an
application this converts suitable critical sections that use mutex into
transactional memory critical sections. See http://lwn.net/Articles/534758/
If a problem occurs that means that transactional memory can't be used, such
as a system call or buffer overflow, the pthreads implementation will catch
this error and retry the critical section using a normal mutex.
I noticed that since upgrading glibc that programs using Boehm GC crash, one
of these crashes was an assertion that the owner field of a mutex was
invalid. The assertion was generated by the pthreads implementation.
I believe that there is a bug in glibc that when a mutex cannot be used
safely for transactions that some series of events causes it's owner field
to be set incorrectly (or cleared when it shouldn't be).
I've found that I can work around this problem by having Boehm GC use an
error checking mutex, which I believe doesn't use lock elision and in my
testing doesn't crash.
XXX: This work-around mostly works except for linking the feature detection
in configure.ac to the conditional compilation in pthread_support.c as there
isn't an obvious way to make it work for automake and Makefile.direct.
Could I have some help updating the build system please?
    Define GC_setup_mark_lock() This procedure creates the lock specifying a
    pthread_mutexattr_t structure. This is used to disable lock elision on
    Linux with glibc 2.19 or greater.
    If we're using Linux then check for the gnu extensions required to
    identify the version of glibc at runtime.
    Call GC_setup_mark_lock() when initialising the collector.
---
 configure.ac | 13 ++++++++++
 include/private/pthread_support.h | 2 ++
 misc.c | 3 +++
 pthread_support.c | 53 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+)
diff --git a/configure.ac b/configure.ac
index 7667949..6853e97 100644
--- a/configure.ac
+++ b/configure.ac
@@ -646,6 +646,19 @@ case "$host" in
  *) AC_MSG_RESULT(no) ;;
 esac
Â
+dnl Check for specific glibc functions and definitions that we need to for
+dnl the glibc 2.19 workaround.
+HAVE_LIBC_VERSION_H=no
+HAVE_GNU_GET_LIBC_VERSION=no
+case "${host}" in
+ *-linux*)
+ AC_CHECK_HEADER([gnu/libc-version.h], HAVE_LIBC_VERSION_H=yes)
+ AC_CHECK_FUNC([gnu_get_libc_version], HAVE_GNU_GET_LIBC_VERSION=yes)
+ ;;
+esac
+AC_SUBST(HAVE_LIBC_VERSION_H)
+AC_SUBST(HAVE_GNU_GET_LIBC_VERSION)
+
 dnl Include defines that have become de facto standard.
 dnl ALL_INTERIOR_POINTERS and NO_EXECUTE_PERMISSION can be overridden
 dnl in the startup code.
diff --git a/include/private/pthread_support.h b/include/private/pthread_support.h
index 525a9aa..017f194 100644
--- a/include/private/pthread_support.h
+++ b/include/private/pthread_support.h
@@ -148,6 +148,8 @@ GC_INNER_PTHRSTART GC_thread GC_start_rtn_prepare_thread(
                                         struct GC_stack_base *sb, void *arg);
 GC_INNER_PTHRSTART void GC_thread_exit_proc(void *);
Â
+GC_INNER void GC_setup_mark_lock(void);
+
 #endif /* GC_PTHREADS && !GC_WIN32_THREADS */
Â
 #endif /* GC_PTHREAD_SUPPORT_H */
diff --git a/misc.c b/misc.c
index df434a1..dccf5f3 100644
--- a/misc.c
+++ b/misc.c
@@ -875,6 +875,9 @@ GC_API void GC_CALL GC_init(void)
         /* else */ InitializeCriticalSection (&GC_allocate_ml);
      }
 # endif /* GC_WIN32_THREADS */
+# if (defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS))
+ GC_setup_mark_lock();
+# endif /* GC_PTHREADS */
 # if (defined(MSWIN32) || defined(MSWINCE)) && defined(THREADS)
       InitializeCriticalSection(&GC_write_cs);
 # endif
diff --git a/pthread_support.c b/pthread_support.c
index c00b93d..56fc94b 100644
--- a/pthread_support.c
+++ b/pthread_support.c
@@ -95,6 +95,10 @@
   typedef unsigned int sem_t;
 #endif /* GC_DGUX386_THREADS */
Â
+#ifdef HAVE_LIBC_VERSION_H
+# include <gnu/libc-version.h>
+#endif
+
 /* Undefine macros used to redirect pthread primitives. */
 # undef pthread_create
 # ifndef GC_NO_PTHREAD_SIGMASK
@@ -1973,12 +1977,61 @@ GC_INNER void GC_lock(void)
   /* defined. */
   static pthread_mutex_t mark_mutex =
         {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, {0, 0}};
+#elif defined(HAVE_GNU_GET_LIBC_VERSION) && defined(HAVE_LIBC_VERSION_H)
+ static pthread_mutex_t mark_mutex;
 #else
   static pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
Â
 static pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER;
Â
+GC_INNER void GC_setup_mark_lock(void)
+{
+#if defined(HAVE_GNU_GET_LIBC_VERSION) && defined(HAVE_LIBC_VERSION_H)
+ pthread_mutexattr_t attr;
+ char *version_str = NULL;
+ char *strtok_save;
+ char *version_part;
+ char *version_str;
+
+ if (0 != pthread_mutexattr_init(&attr)) {
+ goto error;
+ }
+
+ /*
+ ** Check for version 2.19 or greater.
+ */
+ version_str = strdup(gnu_get_libc_version());
+ version_part = strtok_r(version_str, ".", &strtok_save);
+ if ((NULL != version_part) && (2 <= atoi(version_part))) {
+ version_part = strtok_r(NULL, ".", &strtok_save);
+ if ((NULL != version_part) && (19 <= atoi(version_part))) {
+ /*
+ * Disable lock elision on this version of glibc.
+ */
+ if (0 != pthread_mutexattr_settype(&attr,
+ PTHREAD_MUTEX_ERRORCHECK))
+ {
+ goto error;
+ }
+ }
+ }
+
+ if (0 != pthread_mutex_init(&mark_mutex, &attr)) {
+ goto error;
+ }
+ pthread_mutexattr_destroy(&attr);
+ if (NULL != version_str) {
+ free(version_str);
+ }
+ return;
+
+ perror("Error setting up marker mutex");
+ exit(1);
+#endif /* HAVE_GNU_GET_LIBC_VERSION && HAVE_LIBC_VERSION_H */
+}
+
 GC_INNER void GC_acquire_mark_lock(void)
 {
     GC_ASSERT(GC_mark_lock_holder != NUMERIC_THREAD_ID(pthread_self()));
--
2.0.0
--
Paul Bone
_______________________________________________
bdwgc mailing list
https://lists.opendylan.org/mailman/listinfo/bdwgc