[OpenAFS] Linux kernels oops with openafs?

Andrew Leahy aleahy@knox.edu
Thu, 17 Oct 2002 11:17:50 -0400


This is a multi-part message in MIME format.
--------------E5D5EA556B1BAC01FAB34051
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit


Hello,

I've been working to install an openafs server for the first time (RH
Linux 7.2/kernel 2.4.9-34/Openafs 1.2.6) and I've been seeing some
kernel oopses. (See the attached file.)

I'm not certain what is causing this, but I've been writing a script
(also attached) to automate the basic configuration of the openafs
server.  The script loads and unloads the libopenafs modules and starts
up the servers a couple of times.  The script seems to work fine (in the
sense that I can see what I should see in /afs when it's done), but
would, say, loading and unloading the libopenafs modules cause problems?

The only overt symptom of the problem is that when it happens it's
impossible to kill the afsd processes short of a reboot.  I've also been
experiencing problems where I can authenticate successfully against the
kaserver from another client, but I can't see anything in /afs on the
client. (In fact, I've never been able to see anything in /afs from the
client system. I don't know if this is related.)

Does anybody know what to make of this?

Thanks for your help.

Andrew Leahy
--------------E5D5EA556B1BAC01FAB34051
Content-Type: text/plain; charset=us-ascii;
 name="openafs-oops.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="openafs-oops.txt"

Oct 16 16:20:56 pc14341 kernel: WARM shutting down of: CB... afs... BkG... CTrunc... AFSDB... RxEvent... RxListener... 
Oct 16 16:26:37 pc14341 kernel: Starting AFS cache scan...found 0 non-empty cache files (0%%).
Oct 16 16:29:51 pc14341 kernel: IPUT Bad refCount 0 on inode 0xcc99e000
Oct 16 16:29:51 pc14341 kernel: Unable to handle kernel paging request at virtual address ffffffff
Oct 16 16:29:51 pc14341 kernel:  printing eip:
Oct 16 16:29:51 pc14341 kernel: cc8c0b68
Oct 16 16:29:51 pc14341 kernel: *pde = 00001063
Oct 16 16:29:51 pc14341 kernel: *pte = 00000000
Oct 16 16:29:51 pc14341 kernel: Oops: 0002
Oct 16 16:29:51 pc14341 kernel: Kernel 2.4.9-34
Oct 16 16:29:51 pc14341 kernel: CPU:    0
Oct 16 16:29:51 pc14341 kernel: EIP:    0010:[<cc8c0b68>]    Tainted: PF
Oct 16 16:29:51 pc14341 kernel: EFLAGS: 00010286
Oct 16 16:29:51 pc14341 kernel: EIP is at osi_Panic [libafs-2.4.9-34-i386] 0x28 
Oct 16 16:29:51 pc14341 kernel: eax: 00000028   ebx: cc99e000   ecx: 00000007   edx: cbfe5090
Oct 16 16:29:51 pc14341 kernel: esi: cc99e000   edi: cc8e4e20   ebp: cc8e4dec   esp: c3c09efc
Oct 16 16:29:51 pc14341 kernel: ds: 0018   es: 0018   ss: 0018
Oct 16 16:29:51 pc14341 kernel: Process umount (pid: 1948, stackpage=c3c09000)
Oct 16 16:29:51 pc14341 kernel: Stack: cc8ca045 cc8dabc0 00000000 cc99e000 cc99e010 c1691450 c1691484 c2dcd120 
Oct 16 16:29:51 pc14341 kernel:        c0142790 cc99e000 cc99e000 c2dcd120 c2dcd120 cc8e4e20 cc8e4dec c94b0800 
Oct 16 16:29:51 pc14341 kernel:        c2dcd120 c0136d5f c2dcd120 c2dcd120 c3c09f88 00000000 c4a17000 08053bd0 
Oct 16 16:29:51 pc14341 kernel: Call Trace: [<cc8ca045>] osi_iput [libafs-2.4.9-34-i386] 0x29 
Oct 16 16:29:51 pc14341 kernel: [<cc8dabc0>] __insmod_libafs-2.4.9-34-i386_S.rodata_L2024 [libafs-2.4.9-34-i386] 0x3dc0 
Oct 16 16:29:51 pc14341 kernel: [dput+236/364] dput [kernel] 0xec 
Oct 16 16:29:51 pc14341 kernel: [<c0142790>] dput [kernel] 0xec 
Oct 16 16:29:51 pc14341 kernel: [<cc8e4e20>] afs_sops [libafs-2.4.9-34-i386] 0x0 
Oct 16 16:29:51 pc14341 kernel: [<cc8e4dec>] afs_file_system [libafs-2.4.9-34-i386] 0x0 
Oct 16 16:29:51 pc14341 kernel: [kill_super+91/324] kill_super [kernel] 0x5b 
Oct 16 16:29:51 pc14341 kernel: [<c0136d5f>] kill_super [kernel] 0x5b 
Oct 16 16:29:51 pc14341 kernel: [path_release+39/48] path_release [kernel] 0x27 
Oct 16 16:29:51 pc14341 kernel: [<c013a9db>] path_release [kernel] 0x27 
Oct 16 16:29:51 pc14341 kernel: [do_umount+176/204] do_umount [kernel] 0xb0 
Oct 16 16:29:51 pc14341 kernel: [<c014670c>] do_umount [kernel] 0xb0 
Oct 16 16:29:51 pc14341 kernel: [sys_umount+201/228] sys_umount [kernel] 0xc9 
Oct 16 16:29:51 pc14341 kernel: [<c01467f1>] sys_umount [kernel] 0xc9 
Oct 16 16:29:51 pc14341 kernel: [sys_oldumount+11/16] sys_oldumount [kernel] 0xb 
Oct 16 16:29:51 pc14341 kernel: [<c0146817>] sys_oldumount [kernel] 0xb 
Oct 16 16:29:51 pc14341 kernel: [system_call+51/56] system_call [kernel] 0x33 
Oct 16 16:29:51 pc14341 kernel: [<c0106e17>] system_call [kernel] 0x33 
Oct 16 16:29:51 pc14341 kernel: 
Oct 16 16:29:51 pc14341 kernel: 
Oct 16 16:29:51 pc14341 kernel: Code: c6 05 ff ff ff ff 2a c3 55 57 56 53 56 8b 7c 24 1c 83 ff 01 

--------------E5D5EA556B1BAC01FAB34051
Content-Type: text/plain; charset=us-ascii;
 name="script.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="script.txt"

#!/bin/sh
#
# Set up some basic variables
#

THISSYS=
THISIP=
THISCELL=
THISNAME=
THISDRV=
THISPASS=
SRCDIR=
RHRLS=

#
# Install the basic RPM's
#
cd ${SRCDIR}/${RHRLS}
rpm -Uvh openafs-1.2.6-rh7.2.1.i386.rpm
rpm -Uvh openafs-kernel-1.2.6-rh7.2.1.i386.rpm
rpm -Uvh openafs-client-1.2.6-rh7.2.1.i386.rpm

##
## Client Configuration
##
# Set the AFS cache (for the cachemanager) to a larger size
#
cat <<EOF > /usr/vice/etc/cacheinfo
/afs:/usr/vice/cache:2000000
EOF

echo "Don't Worry! I'm changing the cache size in cacheinfo to 2 GB"
echo ""
echo "Don't Worry! I'm ignoring authentication for now"

# Configure ThisCell file (for the client)
#
cat <<EOF > /usr/vice/etc/ThisCell
$THISCELL
EOF

# Configure CellServDB file (for the client)
#
cat <<EOF >> /usr/vice/etc/CellServDB
>$THISCELL           #$THISNAME
$THISIP                  #$THISSYS
EOF

##
## Authentication configuration stuff for PAM, etc. would go here !!!!!!
##

##
## Install and configure the server 
##
# Install the server RPM
#

rpm -Uvh openafs-server-1.2.6-rh7.2.1.i386.rpm

# Configure and start the afs server
#
echo "Don't Worry! I'm modifying /etc/sysconfig/afs to turn AFS_SERVER on"
echo ""

cp -f $SRCDIR/config/sysconfig-afs-server /etc/sysconfig/afs
MODNAME=`/usr/vice/etc/afsmodname`
/sbin/insmod -f /usr/vice/etc/modload/$MODNAME

# Make and mount the appropriate vicepX partitions
#
/sbin/mke2fs $THISDRV

/bin/mkdir /vicepa
/bin/cat <<EOF >> /etc/fstab
$THISDRV               /vicepa                 ext2    defaults        0 2
EOF

/bin/mount -a 

# start and configure the BOS server
#
/usr/afs/bin/bosserver -noauth &
/usr/bin/bos setcellname $THISSYS $THISCELL -noauth

#
# configure the other (non-fileserver) processes on this server
#

#
# the authentication server . . .
/usr/bin/bos create $THISSYS kaserver simple /usr/afs/bin/kaserver \
	-cell $THISCELL -noauth

# the backup server . . . 
/usr/bin/bos create $THISSYS buserver simple /usr/afs/bin/kaserver \
	-cell $THISCELL -noauth

# the protection server . . . 
/usr/bin/bos create $THISSYS ptserver simple /usr/afs/bin/ptserver \
	-cell $THISCELL -noauth

# the volumne location server
/usr/bin/bos create $THISSYS vlserver simple /usr/afs/bin/vlserver \
	-cell $THISCELL -noauth

# Create the initial afs and admin accounts in AFS
#
/usr/sbin/kas create -name afs -initial_password $THISPASS -noauth
/usr/sbin/kas create -name admin -initial_password $THISPASS -noauth
/usr/sbin/kas setfields -name admin -flags ADMIN -noauth
/usr/bin/bos adduser $THISSYS admin -cell $THISCELL -noauth
/usr/bin/bos addkey $THISSYS -key $THISPASS -kvno 0 -cell $THISCELL -noauth

echo "about to pts createuser . . . sleeping 10 seconds"
sleep 10

# Configure group membership for admin
#
/usr/bin/pts createuser -name admin -cell $THISCELL -noauth
/usr/bin/pts adduser -user admin -group system:administrators \
	-cell $THISCELL -noauth

# restart the database servers to take advantage of the new key
#
/usr/bin/bos restart -server $THISSYS -all -cell $THISCELL -noauth

sleep 2
#
# Start file server processes
#
/usr/bin/bos create $THISSYS fs fs /usr/afs/bin/fileserver \
	/usr/afs/bin/volserver /usr/afs/bin/salvager \
	-cell $THISCELL -noauth

echo "performing vos create command . . . sleeping 10 seconds"
sleep 10

# Create the root AFS volume root.afs
#
/usr/sbin/vos create $THISSYS /vicepa root.afs \
	-cell $THISCELL -noauth

#
# Start the update server processes
#
/usr/bin/bos create $THISSYS upserver simple \
	"/usr/afs/bin/upserver -crypt /usr/afs/etc" \
	-cell $THISCELL -noauth

##
## Restart the servers here: afsd must function to issue fs commands
##

echo "restarting servers . . . sleeping 4 seconds"
sleep 4

# kill all the afs processes
#
kill -9 `ps ax | grep afs | awk '{print $1}'`
sleep 2

# make sure they are really dead
#
kill -9 `ps ax | grep afs | awk '{print $1}'`
sleep 2
/sbin/rmmod `/sbin/lsmod | grep libafs | awk '{print $1}'`

/etc/rc.d/init.d/afs start
klog admin -password $THISPASS

#
# Configure a skeletal AFS structure--including access control
#

# access control and replication for the root filesystem
#
/usr/bin/fs setacl /afs system:anyuser rl
/usr/sbin/vos addsite $THISSYS /vicepa root.afs
/usr/sbin/vos release root.afs
/usr/bin/fs checkvolumes

# Create and configure the read-only copy of the root cell volume 
#
/usr/sbin/vos create $THISSYS /vicepa root.cell
/usr/bin/fs mkmount /afs/$THISCELL root.cell
/usr/bin/fs setacl /afs/$THISCELL system:anyuser rl

# Create and configure a (hidden) read-write copy of the root cell
#
/usr/bin/fs mkmount /afs/.$THISCELL root.cell -rw
/usr/sbin/vos addsite $THISSYS /vicepa root.cell
/usr/sbin/vos release root.cell
/usr/bin/fs checkvolumes

# Create and configure a home filesystem
#
/usr/sbin/vos create $THISSYS /vicepa root.home
/usr/bin/fs mkmount /afs/${THISCELL}/home root.home
/usr/bin/fs setacl /afs/${THISCELL}/home system:authuser rl

# Create and configure a shared filesystem
#
/usr/sbin/vos create $THISSYS /vicepa root.shared
/usr/bin/fs mkmount /afs/${THISCELL}/shared root.shared
/usr/bin/fs setacl /afs/${THISCELL}/shared system:anyuser rl

#
# What to do next?  protection groups, users . . . 

exit


--------------E5D5EA556B1BAC01FAB34051--