[OpenAFS] RH9, 2.4.20-30.9smp, AFS 1.2.13 afsd lockup

Mike Polek mike@pictage.com
Wed, 22 Dec 2004 18:29:54 -0800


Hi, y'all,

I attempted to upgrade to 1.2.13 on a RedHat 9 box
with a 2.4.29-30.9smp kernel. Everything was fine on
the uniprocessor kernel. But I get complete lockup
of the machine requiring me to do a hardware reset
with the smp kernel.

I rebuild the kernel, the afs kernel module, and
the whole set of afs rpms on the box, and then
re-upgraded the afsd from the openafs-client rpm.
Same behavior.

I use a MEMCACHE =>  -memcache -blocks=51200

If I use a disk cache, it doesn't lock up immediately,
but it doesn't actually do anything either, and the
machine eventually locks up.

Has anybody experienced similar behavior anywhere
and solved it?



On another note, when I upgraded my RH7.3 uniprocessor
box to 1.2.13, I noticed that it manifested a problem
with not shutting down properly. Apparently the
SIGQUIT doesn't get to the right place. I saw this on
RH8.0 when it first came out, and an upgrade of glibc
fixed it. Apparently it was a threading issue.

My workaround is to patch the stop) section of the
init script and add

(sleep 10; killall -QUIT fileserver) < /dev/null >& /dev/null &

Right before the

/usr/bin/bos shutdown localhost -localauth -wait

line. That sends the SIGQUIT to the fileserver, which the
bosserver is supposed to handle. Anybody experience this
one and know of an appropriate fix?

Thanks,
Mike Polek
Pictage, Inc.
mike at pictage dot com

--------- strace of afsd --------------
# strace -c -d /usr/vice/etc/afsd -memcache -blocks 51200
execve("/usr/vice/etc/afsd", ["/usr/vice/etc/afsd", "-memcache", "-blocks", 
"51200"], [/* 27 vars */]) = 0
  [wait(0x137f) = 909]
pid 909 stopped, [SIGSTOP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
stray syscall exit: eax = 0
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]
  [wait(0x57f) = 909]
pid 909 stopped, [SIGTRAP]


--- lockup ---