[OpenAFS] 1.3.85 Still Crashing w/ Fedora 3 (Linux 2.6.11)

chas@cmf.nrl.navy.mil chas@cmf.nrl.navy.mil
Thu, 21 Jul 2005 16:19:16 -0400


In message <E9B01914C2BF0BC2346FCD82@devrandom.blue.cert.org>,Jason McCormick w
rites:
>^MFailed to invalidate all pages on inode 0xf3aff580 refcount 55 states 0x401 name <null>

well this one is pretty strange.  hopefully its referenced by all the other
inodes.

>^MFailed to invalidate all pages on inode 0xe423fa80 refcount 1 states 0x1
>name <null>
>^MFailed to invalidate all pages on inode 0xe7598300 refcount 1 states
>0x400 name <null>
>^MFailed to invalidate all pages on inode 0xe5016a80 refcount 1 states
>0x401 name <null>

i believe i have tracked this down to a couple problems.  mapcnt isn't
being initialized during afs_NewVCache() -- this confused linux_vma_close
at times (depending on how dirty memory was). 

.execsOrWriters (and .opens for that matter) seems to be a bit strange
as well when dealing with mmap().  if you read/write mmap() a file not
on a readonly volume, then the vnode's execsOrWriters will be 2.  since
2 !< 2 the extra open reference never gets cleaned up.  i dont really
like how i fixed this so i need to think about it some more.

i changed the flush inodes during shutdown to something a little more
aggresive in the failure cases.  this should prevent the oops.

Index: src/afs/afs_vcache.c
===================================================================
RCS file: /cvs/openafs/src/afs/afs_vcache.c,v
retrieving revision 1.90
diff -u -u -r1.90 afs_vcache.c
--- src/afs/afs_vcache.c	11 Jul 2005 18:45:49 -0000	1.90
+++ src/afs/afs_vcache.c	21 Jul 2005 20:04:43 -0000
@@ -887,6 +867,7 @@
     }
 #endif
     tvc->parentVnode = 0;
+    tvc->parentUnique = 0;
     tvc->mvid = NULL;
     tvc->linkData = NULL;
     tvc->cbExpires = 0;
@@ -914,6 +895,9 @@
     VREFCOUNT_SET(tvc, 1);	/* us */
 #endif /* AFS_XBSD_ENV */
 #endif /* AFS_OSF_ENV */
+#if defined(AFS_LINUX22_ENV)
+    tvc->mapcnt = 0;
+#endif
 #ifdef	AFS_AIX32_ENV
     LOCK_INIT(&tvc->pvmlock, "vcache pvmlock");
     tvc->vmh = tvc->segid = NULL;
Index: src/afs/LINUX/osi_vnodeops.c
===================================================================
RCS file: /cvs/openafs/src/afs/LINUX/osi_vnodeops.c,v
retrieving revision 1.110
diff -u -u -r1.110 osi_vnodeops.c
--- src/afs/LINUX/osi_vnodeops.c	13 Jul 2005 15:51:50 -0000	1.110
+++ src/afs/LINUX/osi_vnodeops.c	21 Jul 2005 20:04:43 -0000
@@ -463,9 +463,12 @@
 	if (need_unlock)
 	    ReleaseWriteLock(&vcp->lock);
 	if (!vcp->mapcnt) {
-	    if (need_unlock && vcp->execsOrWriters < 2) {
+	    if (need_unlock && vcp->execsOrWriters < 3) {
 		credp = crref();
-		(void)afs_close(vcp, vmap->vm_file->f_flags, credp);
+		if (vcp->opens == 1)	/* we are holding the last reference, so close */
+		    (void) afs_close(vcp, vmap->vm_file->f_flags, credp);
+		else
+		    vcp->opens--;
 		/* only decrement the execsOrWriters flag if this is not a
 		 * writable file. */
 		if (!(vcp->states & CRO) )
@@ -558,8 +561,8 @@
 	    vcp->opens++;
 	    vcp->states |= CMAPPED;
 	}
-	ReleaseWriteLock(&vcp->lock);
 	vcp->mapcnt++;
+	ReleaseWriteLock(&vcp->lock);
     }
 
     AFS_GUNLOCK();
@@ -599,7 +602,8 @@
     lock_kernel();
 #endif
     AFS_GLOCK();
-    code = afs_close(vcp, fp->f_flags, credp);
+    if (vcp->opens == 1)
+	code = afs_close(vcp, fp->f_flags, credp);
     AFS_GUNLOCK();
 #ifdef AFS_LINUX24_ENV
     unlock_kernel();
Index: src/afs/LINUX/osi_misc.c
===================================================================
RCS file: /cvs/openafs/src/afs/LINUX/osi_misc.c,v
retrieving revision 1.44
diff -u -u -r1.44 osi_misc.c
--- src/afs/LINUX/osi_misc.c	11 Jul 2005 18:45:51 -0000	1.44
+++ src/afs/LINUX/osi_misc.c	21 Jul 2005 20:04:43 -0000
@@ -336,16 +336,32 @@
     struct vcache *tvc, *nvc;
     extern struct vcache *afs_vhashT[VCSIZE];
 
+    ObtainWriteLock(&afs_xvcache, 535);
+
     for (i = 0; i < VCSIZE; i++) {
 	for (tvc = afs_vhashT[i]; tvc; ) {
 	    int slept;
 	
 	    nvc = tvc->hnext;
-	    if (afs_FlushVCache(tvc, &slept))		/* slept always 0 for linux? */
-		printf("Failed to invalidate all pages on inode 0x%p\n", tvc);
+	    if (afs_FlushVCache(tvc, &slept)) {		/* slept always 0 for linux? */
+		struct dentry *dp;
+
+		dp = d_find_alias(AFSTOV(tvc));
+		printk("Failed to flush vnode 0x%p refcount %d states 0x%x opens %d name %s\n",
+		       tvc, VREFCOUNT(tvc), tvc->states, tvc->opens, dp ? (char *) dp->d_name.name : "<null>");
+
+		/* try really really hard */
+		tvc->opens = 0;
+		tvc->states = 0;
+		VREFCOUNT_SET(tvc, 1);
+	        if (afs_FlushVCache(tvc, &slept))
+		    printk("retry failed again!\n");
+	    }
 	    tvc = nvc;
 	}
     }
+
+    ReleaseWriteLock(&afs_xvcache);
 }
 
 struct task_struct *rxk_ListenerTask;