[OpenAFS-devel] Another bosserver crash on Irix 6.5.20 (1.2.10-rc4)

Martin MOKREJŠ mmokrejs@natur.cuni.cz
Tue, 29 Jul 2003 16:51:00 +0200 (CEST)


On Tue, 29 Jul 2003, Martin MOKREJŠ wrote:

Hi,
  I'm sorry for another spam, I believe it's the last one. I cannot print
contents of most of the variables, but I hope this will help someone to
find out what's wrong. ;) If anyone is interrested to help me what's going
on, please email me. Thanks.

> On Tue, 29 Jul 2003, Martin MOKREJŠ wrote:
>
> [8] rxi_CleanupConnection returning 1112248 from rxi_CleanupConnection
> [7] rxi_DestroyConnection returning 1112248 from rxi_DestroyConnection
> [7] calling rxi_DestroyConnection(0x101659c0, 0x80, 0x0, 0x1a, 0x10165c10, 0x1b, 0x1006c9f0, 0x1) from function rxi_ReapConnections
> [8] calling rxi_CleanupConnection(0x101659c0, 0x3f8, 0x101659c0, 0x1006c9c8, 0x1006c9c8, 0x0, 0x1006c9f0, 0x1) from function rxi_DestroyConnection
> Process 20675 (bosserver) stopped on signal SIGBUS: Bus error (default) at [rxi_CleanupConnection:799 +0x4,0x10020d24]
>  799  if (--conn->peer->refCount <= 0) {
> (dbx) where
> >  0 rxi_CleanupConnection(0x101659c0, 0x70, 0xfb37600, 0x1c, 0x10165ac8, 0xa, 0x1, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/rx/rx.c":799, 0x10020d24]
>    1 rxi_DestroyConnection(0x101659c0, 0x70, 0xfb37600, 0x1c, 0x10165ac8, 0xa, 0x1, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/rx/rx.c":847, 0x10020f1c]
>    2 rxi_ReapConnections(0x0, 0x4ae3729, 0xfb37600, 0x5f370, 0x10165ac8, 0xa, 0x1, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/rx/rx.c":5622, 0x10028630]
>    3 rxevent_RaiseEvents(0x100abd50, 0x70, 0xfb37600, 0x1c, 0x10165ac8, 0xa, 0x10139e10, 0x10139d00) ["/scratch2/openafs-1.2.10-rc4/src/rx/rx_event.c":390, 0x10032820]
>    4 rxi_ListenerProc(0x100abe30, 0x100abdcc, 0x100abdc8, 0x0, 0x10165ac8, 0xa, 0x0, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/rx/rx_lwp.c":189, 0x1003166c]
>    5 rx_ListenerProc(0x0, 0x70, 0xfb37600, 0x1c, 0x10165ac8, 0xa, 0x1, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/rx/rx_lwp.c":319, 0x10031a54]
>    6 Create_Process_Part2(0x0, 0x1009fca8, 0xfb37600, 0x1c, 0x10165ac8, 0xa, 0x1, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/lwp/lwp.c":740, 0x10035784]
>    7 savecontext(0x0, 0x0, 0x0, 0x1c, 0x10165ac8, 0x0, 0x1, 0x1) ["/scratch2/openafs-1.2.10-rc4/src/lwp/process.c":199, 0x100366c0]
>    8 <Unknown>() [< unknown >, 0xfcfdfeff]
> (dbx) l
> >* 799      if (--conn->peer->refCount <= 0) {
>    800          conn->peer->idleWhen = clock_Sec();
>    801          if (conn->peer->refCount < 0) {
>    802              conn->peer->refCount = 0;
>    803              MUTEX_ENTER(&rx_stats_mutex);
>    804              rxi_lowPeerRefCount ++;
>    805              MUTEX_EXIT(&rx_stats_mutex);
>    806          }
>    807      }
>    808      MUTEX_EXIT(&rx_peerHashTable_lock);
> (dbx)

(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:958 ,0x100210a8]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:958 ,0x100210a8]
 958  *conn_ptr = conn->next;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:964 ,0x100210b0]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:964 ,0x100210b0]
 964  if ( rxLastConn == conn )
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:965 +0x4,0x100210c4]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:965 +0x4,0x100210c4]
 965  rxLastConn = 0;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:970 +0x10,0x100210dc]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:970 +0x10,0x100210dc]
 970  rxevent_Cancel(conn->challengeEvent, (struct rx_call*)0, 0);
(dbx) list
>* 970          rxevent_Cancel(conn->challengeEvent, (struct rx_call*)0, 0);
   971      if (conn->checkReachEvent)
   972          rxevent_Cancel(conn->checkReachEvent, (struct rx_call*)0, 0);
   973
   974      /* Add the connection to the list of destroyed connections that
   975       * need to be cleaned up. This is necessary to avoid deadlocks
   976       * in the routines we call to inform others that this connection is
   977       * being destroyed. */
   978      conn->next = rx_connCleanup_list;
   979      rx_connCleanup_list = conn;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:972 +0x10,0x100210f4]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:972 +0x10,0x100210f4]
 972  rxevent_Cancel(conn->checkReachEvent, (struct rx_call*)0, 0);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:978 ,0x100210fc]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:978 ,0x100210fc]
 978  conn->next = rx_connCleanup_list;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:979 ,0x10021104]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:979 ,0x10021104]
 979  rx_connCleanup_list = conn;
(dbx) print conn
Process 20942: Appropriate symbol not found for: conn
<symbol not found>
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:980 ,0x10021108]
Process 20942 (bosserver) stopped at [rxi_DestroyConnectionNoLock:980 ,0x10021108]
 980  }
(dbx) list
>* 980  }
   981
   982  /* Externally available version */
   983  void rx_DestroyConnection(conn)
   984      register struct rx_connection *conn;
   985  {
   986      SPLVAR;
   987
   988      NETPRI;
   989      AFS_RXGLOCK();
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnection:842 ,0x10020ef0]
Process 20942 (bosserver) stopped at [rxi_DestroyConnection:842 ,0x10020ef0]
 842  rxi_DestroyConnectionNoLock(conn);
(dbx) list
>* 842      rxi_DestroyConnectionNoLock(conn);
   843      /* conn should be at the head of the cleanup list */
   844      if (conn == rx_connCleanup_list) {
   845          rx_connCleanup_list = rx_connCleanup_list->next;
   846          MUTEX_EXIT(&rx_connHashTable_lock);
   847          rxi_CleanupConnection(conn);
   848      }
   849  #ifdef RX_ENABLE_LOCKS
   850      else {
   851          MUTEX_EXIT(&rx_connHashTable_lock);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnection:847 ,0x10020f10]
Process 20942 (bosserver) stopped at [rxi_DestroyConnection:847 ,0x10020f10]
 847  rxi_CleanupConnection(conn);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnection:845 ,0x10020f14]
Process 20942 (bosserver) stopped at [rxi_DestroyConnection:845 ,0x10020f14]
 845  rx_connCleanup_list = rx_connCleanup_list->next;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_DestroyConnection:847 ,0x10020f18]
Process 20942 (bosserver) stopped at [rxi_DestroyConnection:847 ,0x10020f18]
 847  rxi_CleanupConnection(conn);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:783 ,0x10020cd0]
[4] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:783 ,0x10020cd0]
 783  {
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:788 ,0x10020cd4]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:788 ,0x10020cd4]
 788  if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:783 ,0x10020cd8]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:783 ,0x10020cd8]
 783  {
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:788 ,0x10020cdc]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:788 ,0x10020cdc]
 788  if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:783 ,0x10020ce0]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:783 ,0x10020ce0]
 783  {
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:788 ,0x10020cf4]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:788 ,0x10020cf4]
 788  if (conn->type == RX_SERVER_CONNECTION && conn->service->destroyConnProc)
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:792 ,0x10020cfc]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:792 ,0x10020cfc]
 792  RXS_DestroyConnection(conn->securityObject, conn);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:265 ,0x10040f74]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:265 ,0x10040f74]
 265  aconn->securityData = 0;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:268 +0x4,0x10040f84]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:268 +0x4,0x10040f84]
 268  rxkad_stats.destroyConn[rxkad_LevelIndex(sconn->level)]++;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:271 ,0x10040fb8]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:271 ,0x10040fb8]
 271  rock = sconn->rock;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:272 ,0x10041024]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:272 ,0x10041024]
 272  if (rock) rxi_Free (rock, sizeof(struct rxkad_serverinfo));
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x100227e8]
Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x100227e8]
2184  osi_Free(addr, size);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2162 ,0x100227ec]
Process 20942 (bosserver) stopped at [rxi_Free:2162 ,0x100227ec]
2162  {
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2172 ,0x100227f4]
Process 20942 (bosserver) stopped at [rxi_Free:2172 ,0x100227f4]
2172  rxi_Alloccnt--; rxi_Allocsize -= size;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x10022808]
Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x10022808]
2184  osi_Free(addr, size);
(dbx) s
[5] Process 20942 (bosserver) stopped at [_free:905 ,0xfb1e93c]
Process 20942 (bosserver) stopped at [_free:905 ,0xfb1e93c]
         Source (of /xlv47/6.5.20f/work/irix/lib/libc/libc_n32_M3/gen/malloc.c) not available for Process 20942
(dbx) n
[5] Process 20942 (bosserver) stopped at [rxi_Free:2186 ,0x10022814]
Process 20942 (bosserver) stopped at [rxi_Free:2186 ,0x10022814]
2186  }
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:273 +0x4,0x10040fc8]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:273 +0x4,0x10040fc8]
 273  rxi_Free (sconn, sizeof(struct rxkad_sconn));
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x100227e8]
Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x100227e8]
2184  osi_Free(addr, size);
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2162 ,0x100227ec]
Process 20942 (bosserver) stopped at [rxi_Free:2162 ,0x100227ec]
2162  {
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2172 ,0x100227f4]
Process 20942 (bosserver) stopped at [rxi_Free:2172 ,0x100227f4]
2172  rxi_Alloccnt--; rxi_Allocsize -= size;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x10022808]
Process 20942 (bosserver) stopped at [rxi_Free:2184 ,0x10022808]
2184  osi_Free(addr, size);
(dbx) p size
Process 20942: Appropriate symbol not found for: size
<symbol not found>
(dbx) print size
Process 20942: Appropriate symbol not found for: size
<symbol not found>
(dbx) s
[5] Process 20942 (bosserver) stopped at [_free:905 ,0xfb1e93c]
Process 20942 (bosserver) stopped at [_free:905 ,0xfb1e93c]
         Source (of /xlv47/6.5.20f/work/irix/lib/libc/libc_n32_M3/gen/malloc.c) not available for Process 20942
(dbx) n
[5] Process 20942 (bosserver) stopped at [_free:906 +0xc,0xfb1e960]
Process 20942 (bosserver) stopped at [_free:906 +0xc,0xfb1e960]
         Source (of /xlv47/6.5.20f/work/irix/lib/libc/libc_n32_M3/gen/malloc.c) not available for Process 20942
(dbx) n
[5] Process 20942 (bosserver) stopped at [_free:907 ,0xfb1e964]
Process 20942 (bosserver) stopped at [_free:907 ,0xfb1e964]
         Source (of /xlv47/6.5.20f/work/irix/lib/libc/libc_n32_M3/gen/malloc.c) not available for Process 20942
(dbx) n
[5] Process 20942 (bosserver) stopped at [rxi_Free:2186 ,0x10022814]
Process 20942 (bosserver) stopped at [rxi_Free:2186 ,0x10022814]
2186  }
(dbx) list
>*2186  }
  2187
  2188  /* Find the peer process represented by the supplied (host,port)
  2189   * combination.  If there is no appropriate active peer structure, a
  2190   * new one will be allocated and initialized
  2191   * The origPeer, if set, is a pointer to a peer structure on which the
  2192   * refcount will be be decremented. This is used to replace the peer
  2193   * structure hanging off a connection structure */
  2194  struct rx_peer *rxi_FindPeer(host, port, origPeer, create)
  2195      register afs_uint32 host;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:295 +0x4,0x10040f48]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:295 +0x4,0x10040f48]
 295  aobj->refCount--;                 /* decrement connection counter */
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:301 ,0x10040f58]
Process 20942 (bosserver) stopped at [rxkad_DestroyConnection:301 ,0x10040f58]
 301  return 0;
(dbx) s
[5] Process 20942 (bosserver) stopped at [rxi_CleanupConnection:799 ,0x10020d20]
Process 20942 (bosserver) stopped at [rxi_CleanupConnection:799 ,0x10020d20]
 799  if (--conn->peer->refCount <= 0) {
(dbx) s
[5] Process 20942 (bosserver) stopped on signal SIGBUS: Bus error (default) at [rxi_CleanupConnection:799 +0x4,0x10020d24]
Process 20942 (bosserver) stopped on signal SIGBUS: Bus error (default) at [rxi_CleanupConnection:799 +0x4,0x10020d24]
 799  if (--conn->peer->refCount <= 0) {
(dbx)


-- 
Martin Mokrejs <mmokrejs@natur.cuni.cz>, <m.mokrejs@gsf.de>
PGP5.0i key is at http://www.natur.cuni.cz/~mmokrejs
MIPS / Institute for Bioinformatics <http://mips.gsf.de>
GSF - National Research Center for Environment and Health
Ingolstaedter Landstrasse 1, D-85764 Neuherberg, Germany
tel.: +49-89-3187 3683 , fax: +49-89-3187 3585