[OpenAFS] 1.3.79: LWP: stack overflow in process IO MANAGER!

Kevin openafs@gnosys.biz
Sun, 06 Mar 2005 16:42:23 -0500


Hi List-

I just built 1.3.79 on an i386 afs-systype (Celeron 2.53GHz) and can't
start the bosserver.  When I try, I get:

aphrodite bin # ./bosserver -noauth
stackcheck = 50462976: stack = 50462976
topstack = 0x808b5d0: stackptr = 0xb7e6e010: stacksize = 0x30000
Sun Mar  6 14:39:24 2005 LWP: stack overflow in process IO MANAGER!
Aborted

strace below in case it helps someone.  All Greek to me...

aphrodite bin # strace ./bosserver -noauth
execve("./bosserver", ["./bosserver", "-noauth"], [/* 45 vars */]) = 0
uname({sys="Linux", node="aphrodite", ...}) = 0
brk(0)                                  = 0x808b000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or
directory)
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=144116, ...}) = 0
mmap2(NULL, 144116, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7fc7000
close(3)                                = 0
open("/lib/libresolv.so.2", O_RDONLY)   = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0000%\0\000"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=64788, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb7fc6000
mmap2(NULL, 76052, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0)
= 0xb7fb3000
mmap2(0xb7fc2000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0xe) = 0xb7fc2000
mmap2(0xb7fc4000, 6420, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_ANONYMOUS, -1, 0) = 0xb7fc4000
close(3)                                = 0
open("/lib/tls/libc.so.6", O_RDONLY)    = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0-R\1\000"...,
512) = 512fstat64(3, {st_mode=S_IFREG|0755, st_size=1190224, ...}) = 0
mmap2(NULL, 1125596, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0xb7ea0000
mprotect(0xb7fac000, 27868, PROT_NONE)  = 0
mmap2(0xb7fad000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_DENYWRITE, 3, 0x10c) = 0xb7fad000
mmap2(0xb7fb1000, 7388, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|
MAP_ANONYMOUS, -1, 0) = 0xb7fb1000
close(3)                                = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb7e9f000
mprotect(0xb7fad000, 4096, PROT_READ)   = 0
set_thread_area({entry_number:-1 -> 6, base_addr:0xb7e9f6c0,
limit:1048575, seg_32bit:1, contents:0, read_exec_only:0,
limit_in_pages:1, seg_not_present:0, useable:1}) = 0
munmap(0xb7fc7000, 144116)              = 0
open("/dev/urandom", O_RDONLY)          = 3
read(3, "\1V\250\276", 4)               = 4
close(3)                                = 0
geteuid32()                             = 0
brk(0)                                  = 0x808b000
brk(0x80ac000)                          = 0x80ac000
mmap2(NULL, 200704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb7e6e000
fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb7e6d000
write(1, "stackcheck = 50462976: stack = 5"..., 41stackcheck = 50462976:
stack = 50462976
) = 41
write(1, "topstack = 0x808b5d0: stackptr ="..., 65topstack = 0x808b5d0:
stackptr = 0xb7e6e010: stacksize = 0x30000
) = 65
time(NULL)                              = 1110137971
open("/etc/localtime", O_RDONLY)        = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
fstat64(3, {st_mode=S_IFREG|0644, st_size=1267, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0xb7e6c000
read(3, "TZif\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0"...,
4096) = 1267
close(3)                                = 0
munmap(0xb7e6c000, 4096)                = 0
write(2, "Sun Mar  6 14:39:31 2005", 24Sun Mar  6 14:39:31 2005) = 24
write(2, " LWP: stack overflow in process ", 32 LWP: stack overflow in
process ) = 32
write(2, "IO MANAGER", 10IO MANAGER)              = 10
write(2, "!\n", 2!
)                      = 2
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
gettid()                                = 26257
tgkill(26257, 26257, SIGABRT)           = 0
--- SIGABRT (Aborted) @ 0 (0) ---
+++ killed by SIGABRT +++


This is on an x86 Gentoo system recently built from sources and current
(or more recent) portage.  The ebuild is one that worked fine for me in
building client software on a Gentoo ppc linux system (well, it built
the whole package, but I've only used the client-side software in ppc
linux).  The kernel is 2.6.10-gentoo-r6.

I'm currently running OAFS 1.2.11 (built from source) on an x86 SuSE 9.0
box as the sole OAFS server in the network and that's been working fine
for nearly a year now, serving up one cell and files to Mac OSX, x86
Linux 2.4 and 2.6, and ppc Linux 2.6.  Now I'm building OAFS 1.3.79 for
another x86 box to be an additional file server (and perhaps other
functions) on the network.  I asked recently on -devel about the
advisability of mixing 1.2.recent OAFS with 1.3.recent OAFS servers, and
one person replied saying that he'd had no problems doing this.  I doubt
that this stack overflow is related to the existence of the other
server, but not sure how to determine this with certainty.  I read in
the archives of this list about other stack overflows and someone
obtained a stack dump and included it, but I'm not sure how to obtain a
stack dump for myself.

Any suggestions on how to resolve this?

TIA.


-- 
-Kevin
http://www.gnosys.us