[OpenAFS] Problems with 1.4.1 (built 2006-04-11) on AIX 5.3 ML03

Tomasz Skarupa tomasz.skarupa@gmail.com
Fri, 09 Jun 2006 18:10:47 +0200


I help in the administration of the cell enea.it.

We are migrating our AIX fileservers [AIX 5.3 ML03] from AFS Transarc
3.6 2.38 (inode) to OpenAFS (namei).

Version OpenAFS 1.4.0 works well but we have tried also version 1.4.1
built 2006-04-11 and there we have two problems:

1) frequent [every 2-3 days] coredumps:
Segmentation fault in rxkad_DecryptPacket

the output of dbx (with showProcInfo script) is shown at the end of
the E-mail

2) after about a day it does not answer to vos listvol commands, while
bos status results are normal.

Is there anybody using openafs 1.4.1 successfully on AIX 5.3 ?
How can we contribute to the diagnostic of the problem?

Thanks.

Tomasz Skarupa

----

bash-3.00# ./showProcInfo corefile.fs fileserver
showProcInfo: corefile.fs appears to be the core file
showProcInfo: fileserver appears to be the executable
showProcInfo: Is this correct? [y] y
showProcInfo: Switched file interpretation
Information for fileserver ven  9 giu 13.08.52 2006

Type 'help' for help.
[using memory image in corefile.fs]
reading symbolic information ...warning: no source compiled with -g


Segmentation fault in rxkad_DecryptPacket at 0x10027be4 ($t5)
0x10027be4 (rxkad_DecryptPacket+0x78) 881e2f30        lbz   r0,0x2f30(r30)
 thread  state-k     wchan    state-u    k-tid   mode held scope function
 $t1     run                  running   393235     u   no   pro  _p_nsleep
 $t2     run                  running   729325     u   no   pro
_event_sleep
 $t3     run                  running   467039     u   no   pro
rxi_Recvmsg
 $t4     run                  blocked   553077     u   no   pro
_event_sleep
>$t5     run                  running   426021     k   no   pro  
rxkad_DecryptPacket
 $t6     run                  blocked              u   no   pro
_usched_swtch
 $t7     run                  blocked              u   no   pro
_usched_swtch
 $t8     run                  blocked              u   no   pro
_usched_swtch
 $t9     run                  blocked              u   no   pro
_usched_swtch
 $t10    run                  blocked              u   no   pro
_usched_swtch
 $t11    run                  blocked              u   no   pro
_usched_swtch
 $t12    run                  blocked              u   no   pro
_usched_swtch
 $t13    run                  blocked              u   no   pro
_usched_swtch
 $t14    run                  blocked              u   no   pro
_usched_swtch
 $t15    run                  blocked              u   no   pro
_usched_swtch
 $t16    run                  running   286939     u   no   pro
__fd_select
 $t18    run                  running   475199     u   no   pro  _p_nsleep
 $t19    run                  running   446575     u   no   pro  _p_nsleep
 $t20    run                  blocked   462873     u   no   pro
_event_sleep

19 threads

=== Thread 1 ===

_p_nsleep(??, ??) at 0xd01266a8
raise.nsleep(??, ??) at 0xd0336df4
sleep(??) at 0xd036a50c
main() at 0x10000ddc

=== Thread 2 ===

_event_sleep(??, ??, ??, ??, ??, ??) at 0xd0121404
_p_sigtimedwait(??, ??, ??) at 0xd0126120
pth_signal.sigwait(??, ??) at 0xd0127198
softsig_thread() at 0x10038e9c

=== Thread 3 ===

rxi_Recvmsg() at 0x10012668
rxi_ReadPacket() at 0x1000fcc4
rxi_ListenerProc() at 0x10012114
rx_ListenerProc() at 0x100123ec

=== Thread 4 ===

_event_sleep(??, ??, ??, ??, ??, ??) at 0xd01213e0
_event_wait(??, ??) at 0xd0121944
_cond_wait_local(??, ??, ??) at 0xd012b8e4
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_timedwait(??, ??, ??) at 0xd012c7cc
event_handler() at 0x10012330

=== Thread 5 ===

rxkad_DecryptPacket() at 0x10027be4
rxkad_CheckPacket() at 0x1000b3a4
rxi_ReadProc() at 0x10026814
rx_ReadProc32() at 0x100264d8
xdrrx_getint32() at 0x10049df8
xdr.xdr_int() at 0x1004a7f4
RXAFS_ExecuteRequest() at 0x10062d0c
rxi_ServerProc() at 0x1001fdfc
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 6 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 7 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 8 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 9 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 10 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 11 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 12 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 13 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 14 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 15 ===

_usched_swtch(??) at 0xd01211e8
_event_wait(??, ??) at 0xd0121908
_cond_wait_local(??, ??, ??) at 0xd012b904
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_wait(??, ??) at 0xd012ca40
rx_GetCall() at 0x1001f870
rxi_ServerProc() at 0x1001fd28
rx_ServerProc() at 0x10012f88
server_entry() at 0x10012360

=== Thread 16 ===

__fd_select(??, ??, ??, ??, ??) at 0xd03a6ed4
partition.select() at 0x100890b4
FSYNC_sync() at 0x10088c58

=== Thread 18 ===

_p_nsleep(??, ??) at 0xd01266a8
raise.nsleep(??, ??) at 0xd0336df4
sleep(??) at 0xd036a50c
FiveMinuteCheckLWP() at 0x1000328c

=== Thread 19 ===

_p_nsleep(??, ??) at 0xd01266a8
raise.nsleep(??, ??) at 0xd0336df4
sleep(??) at 0xd036a50c
HostCheckLWP() at 0x100031f8

=== Thread 20 ===

_event_sleep(??, ??, ??, ??, ??, ??) at 0xd01213e0
_event_wait(??, ??) at 0xd0121944
_cond_wait_local(??, ??, ??) at 0xd012b8e4
_cond_wait(??, ??, ??) at 0xd012bef4
pthread_cond_timedwait(??, ??, ??) at 0xd012c7cc
FsyncCheckLWP() at 0x10003124