[OpenAFS-devel] more on the 2.2.18pre17 SMP cpu hog/etc.

Donna Baird nneul@umr.edu
Thu, 30 Nov 2000 16:54:07 -0600


Well, I tried it again with that patch for the unload/memory issue, and
here's what happens.

Basically, as soon as afsd attempts to start, it starts sucking pretty
much all the cpu in the machine, but isn't doing much of anything at
all.

See below for details.

I should point out, I've got openafs running just fine on several
machines, but this home machine of mine is the one causing a severe
problem.

-- Nathan

------------------------------------------------------------
Nathan Neulinger                       EMail:  nneul@umr.edu
University of Missouri - Rolla         Phone: (573) 341-4841
CIS - Systems Programming                Fax: (573) 341-4216


This is a snapshot from yamm (the initial afsd is gone cause I killed
it) - note, it appears that the afsd that are still sucking cpu do not
kill the machines performance, but the one that I killed (the initial
parent) does bring the machine to almost a grinding halt.

[16:39:21] Load averages: 5.08  2.71  1.08.  Pages: 0 -   0.00% Idle 
52 Processes: 46 Sleeping, 6 Running, 0 Stopped, 0 Zombie, 0 Others.
Memory: 504.74M  ( 153.28M ) real,  517.67M  (      0K ) virtual,
869.14M free

SWN USER        UID   PID  NI PRI    SIZE     RSS  %WCPU     WCHAN     
CMD
R   root          0   915   0  20   1.51M    808K 33.00% 0x0000000000
afsd
R   root         0   920   0  11   1.51M    808K 32.66% 0x0000000000
afsd
R   root          0   919   0  10   1.51M    808K 28.57% 0x0000000000
afsd
R   root          0   917   0  18   1.51M    808K 28.00% 0x0000000000
afsd
R   root          0   918   0  19   1.51M    808K 27.85% 0x0000000000
afsd
R   root          0   940   0  12   1.87M   1.25M 14.66% 0x0000000000
yamm
S   root          0   639   0   3 275.83M  10.41M  9.33%    do_select
X         
S   root          0   728   0   5   4.92M   3.14M  6.71%    do_select
enlightenm
S   root          0   942   0   0   5.06M   3.10M  5.83%    do_select
nedit     
S   root          0   830   0   0  23.10M  15.80M  1.33%    do_select
netscape  
S   root          0   763   0   1   2.94M   1.25M  1.16%    do_select
E-Clock.ep
...

Now, the output from:

strace -v -f -o /tmp/afsd.strace.out /usr/vice/etc/afsd -stat 300
-dcache 100 -daemons 2 -volumes 50 -verbose > & /tmp/afsd.out

---------------
<my cell is umr.edu>
<normal banner>
Process 914 attached
Process 915 attached
Process 916 attached
Process 917 attached
Process 918 attached
Process 919 attached
Process 920 attached
SScall(137, 28)=0 afsd: Forking rx listener daemon.
afsd: Forking rx callback listener.
afsd: Forking rxevent daemon.
afsd: Forking AFS daemon.
afsd: Forking Check Server Daemon.
afsd: Forking 2 background daemons.
SScall(137, 28)=0 SScall(137, 28)=0 SScall(137, 28)=0 SScall(137, 28)=0
SScall(1
...
<about 30-40 lines of the 137,28,0 ones>
SScall(137, 28)=0 afsd: Calling AFSOP_CACHEINIT: 300 stat cache entrie
s, 35000 optimum cache files, 350000 blocks in the cache, flags = 0x0,
dcache en
tries 100
SScall(137, 28)=0 afsd: Sweeping workstation's AFS cache directory.
afsd: 35000 out of 35000 data cache files found in sweep 1.
SScall(137, 28)=0 SScall(137, 28)=0 SScall(137, 28)=0 SScall(137, 28)=0
SScall(1
...
<about 30-40 lines of the 137,28,0 stuff until I killed the afsd.
------------------

now, the output from the strace itself:

-----------------
<lots of setup stuff>
913   open("/usr/vice/etc/AFSLog", O_RDWR) = 4
913   close(4)                          = 0
913   mmap(0, 143360, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0x4010e000
913   write(1, "afsd: My home cell is \'umr.edu\'"..., 1030) = 1030
913   socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 4
913   ioctl(4, SIOCGIFCONF, 0xbffeec98) = 0
913   ioctl(4, SIOCGIFFLAGS, 0xbffeecc0) = 0
913   ioctl(4, SIOCGIFFLAGS, 0xbffeece0) = 0
913   ioctl(4, SIOCGIFNETMASK, 0xbffeece0) = 0
913   ioctl(4, SIOCGIFMTU, 0xbffeece0)  = 0
913   close(4)                          = 0
913   open("/usr/vice/etc/NetInfo", O_RDONLY) = -1 ENOENT (No such file
or directory)
913   socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 4
913   ioctl(4, SIOCGIFCONF, 0xbffeec5c) = 0
913   ioctl(4, SIOCGIFFLAGS, 0xbffeec84) = 0
913   ioctl(4, SIOCGIFFLAGS, 0xbffeeca4) = 0
913   ioctl(4, SIOCGIFNETMASK, 0xbffeeca4) = 0
913   ioctl(4, SIOCGIFMTU, 0xbffeeca4)  = 0
913   close(4)                          = 0
913   open("/usr/vice/etc/NetRestrict", O_RDONLY) = -1 ENOENT (No such
file or directory)
913   afs_syscall(0x1c, 0x11, 0x1, 0xbfffe9b8, 0xbfffd9b8) = 0
913   fork()                            = 914
914   setpriority(PRIO_PROCESS, 0, -10 <unfinished ...>
913   fork( <unfinished ...>
914   <... setpriority resumed> )       = 0
913   <... fork resumed> )              = 915
914   afs_syscall(0x1c, 0x30, 0x190, 0, 0 <unfinished ...>
915   afs_syscall(0x1c, 0, 0x190, 0x806cd6c, 0 <unfinished ...>
913   fork()                            = 916
913   fork( <unfinished ...>
916   setpriority(PRIO_PROCESS, 0, -10 <unfinished ...>
913   <... fork resumed> )              = 917
916   <... setpriority resumed> )       = 0
916   afs_syscall(0x1c, 0x13, 0x806cd6c, 0, 0x806cd80 <unfinished ...>
913   fork()                            = 918
917   afs_syscall(0x1c, 0x1, 0x806cd6c, 0, 0x806cd80 <unfinished ...>
918   afs_syscall(0x1c, 0x4, 0x806cd6c, 0, 0x806cd80 <unfinished ...>
913   fork()                            = 919
913   fork()                            = 920
919   afs_syscall(0x1c, 0x2, 0x806cd6c, 0, 0x806cd80 <unfinished ...>
913   afs_syscall(0x1c, 0x1d, 0xbfffc548, 0x808a404, 0x2) = 0
920   afs_syscall(0x1c, 0x2, 0x806cd6c, 0, 0x806cd80 <unfinished ...>
913   afs_syscall(0x1c, 0x1d, 0xbfffc548, 0x808a134, 0x2) = 0
913   afs_syscall(0x1c, 0x1d, 0xbfffc548, 0x8089e64, 0x2) = 0
913   afs_syscall(0x1c, 0x1d, 0xbfffc548, 0x806d994, 0x2) = 0
<tons of those with incrementing value>
913   afs_syscall(0x1c, 0x1d, 0xbfffc548, 0x806d6c4, 0x2) = 0
913   afs_syscall(0x1c, 0x1d, 0xbfffc548, 0x806d3f4, 0x1) = 0
913   afs_syscall(0x1c, 0x6, 0x806ab40, 0x806cd6c, 0) = 0
913   chmod("/afscache", 0700)          = 0
913   open("/dev/null", O_RDONLY|O_NONBLOCK|0x10000) = -1 ENOTDIR (Not a
directory)
913   open("/afscache", O_RDONLY|O_NONBLOCK|0x10000) = 4
913   fstat(4, {st_dev=makedev(3, 6), st_ino=170304,
st_mode=S_IFDIR|0700, st_nlink=2, st_uid=0, st_gid=0, s
t_blksize=4096, st_blocks=1483, st_size=758978,
st_atime=100/10/31-17:27:58, st_mtime=100/10/31-17:30:03, st
_ctime=100/11/30-16:35:38}) = 0
913   fcntl(4, F_SETFD, FD_CLOEXEC)     = 0
913   brk(0x808d000)                    = 0x808d000
913   getdents(4, {{d_ino=170304, d_off=2, d_reclen=12, d_name=""}
{d_ino=2, d_off=175488, d_reclen=16, d_na
me="."} {d_ino=170313, d_off=175744, d_reclen=16, d_name="0"}
{d_ino=170314, d_off=175872, d_reclen=16, d_na
me="1"} {d_ino=170315, d_off=176000, d_reclen=16, d_name="2"}
{d_ino=170316, d_off=176256, d_reclen=16, d_na

<tons of these, counting up to 35000 stepping through the files in
afscache dir>

f=444506368, d_reclen=20, d_name="34996"} {d_ino=316778,
d_off=444506496, d_reclen=20, d_name="34997"} {d_in
o=316779, d_off=444506624, d_reclen=20, d_name="34998"} {d_ino=316780,
d_off=1372972800, d_reclen=20, d_name
="34999"} {d_ino=170312, d_off=2113294464, d_reclen=24,
d_name="olumeItems"} {d_ino=170311, d_off=2113294465
, d_reclen=24, d_name="acheItems"}}, 3933) = 328
913   getdents(4, {}, 3933)             = 0
913   close(4)                          = 0
913   afs_syscall(0x1c, 0x7, 0x806af80, 0x806cd6c, 0) = 0
913   afs_syscall(0x1c, 0x8, 0x8069720, 0x806cd6c, 0) = 0
913   afs_syscall(0x1c, 0xb, 0x806a740, 0x806cd6c, 0) = 0
<tons of these, with incrementing value, but after a while they start
coming really slowly, which is about when the machine is getting ground
to a halt.>

-- Nathan