[OpenAFS-devel] lots of vmalloc failures lately...

Neulinger, Nathan nneul@umr.edu
Mon, 26 Apr 2004 10:42:05 -0500


Thanks, will try that... I should note - it's not just that one place,
that just happens to be the most recent incident. I'm going to try and
add some more failure checking and asserts to any other places that I've
seen oops for recently. Maybe some of them can be handled a bit more
gracefully.

-- Nathan

------------------------------------------------------------
Nathan Neulinger                       EMail:  nneul@umr.edu
University of Missouri - Rolla         Phone: (573) 341-6679
UMR Information Technology             Fax: (573) 341-4216
=20

> -----Original Message-----
> From: chas williams (contractor) [mailto:chas@cmf.nrl.navy.mil]=20
> Sent: Monday, April 26, 2004 10:29 AM
> To: Neulinger, Nathan
> Cc: openafs-devel@openafs.org
> Subject: Re: [OpenAFS-devel] lots of vmalloc failures lately...=20
>=20
> In message=20
> <5C51DC2B8353AB4BA2CD04B34F2EE79C3EFF20@umr-umail1.umr.edu>,"Neuling
> er, Nathan" writes:
> >I've been seeing a lot more vmalloc failures on a couple of my boxes
> >recently... running with a 350MB disk cache.
> >
> >Unfortunately, the result is almost always that AFS on the=20
> box becomes
> >completely unusable, and the load usually shoots through the=20
> roof over
> >the next few minutes.
>=20
> i have a patch you could try --
>=20
> Index: src/afs/afs.h
> =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> RCS file: /cvs/openafs/src/afs/afs.h,v
> retrieving revision 1.45
> diff -u -u -r1.45 afs.h
> --- src/afs/afs.h	5 Apr 2004 22:39:51 -0000	1.45
> +++ src/afs/afs.h	16 Apr 2004 21:18:59 -0000
> @@ -926,6 +926,7 @@
>  /* kept in memory */
>  struct dcache {
>      struct afs_q lruq;		/* Free queue for=20
> in-memory images */
> +    struct afs_q dirty;		/* Queue of dirty=20
> entries that need written */
>      afs_rwlock_t lock;		/* Protects validPos, some f */
>      afs_rwlock_t tlock;		/* Atomizes updates to=20
> refCount */
>      afs_rwlock_t mflock;	/* Atomizes accesses/updates to=20
> mflags */
> Index: src/afs/afs_dcache.c
> =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> RCS file: /cvs/openafs/src/afs/afs_dcache.c,v
> retrieving revision 1.40
> diff -u -u -r1.40 afs_dcache.c
> --- src/afs/afs_dcache.c	15 Jul 2003 23:14:12 -0000	1.40
> +++ src/afs/afs_dcache.c	16 Apr 2004 21:18:59 -0000
> @@ -2582,8 +2582,8 @@
>  {
>      register struct dcache *tdc;
>      register afs_int32 i, touchedit =3D 0;
> -    struct dcache **ents;
> -    int entmax, entcount;
> +
> +    struct afs_q DirtyQ, *tq;
> =20
>      AFS_STATCNT(afs_WriteThroughDSlots);
> =20
> @@ -2593,9 +2593,7 @@
>       * for every dcache entry, and exit xdcache.
>       */
>      MObtainWriteLock(&afs_xdcache, 283);
> -    entmax =3D afs_cacheFiles;
> -    ents =3D afs_osi_Alloc(entmax * sizeof(struct dcache *));
> -    entcount =3D 0;
> +    QInit(&DirtyQ);
>      for (i =3D 0; i < afs_cacheFiles; i++) {
>  	tdc =3D afs_indexTable[i];
> =20
> @@ -2605,7 +2603,7 @@
>  	    tdc->refCount++;
>  	    ReleaseWriteLock(&tdc->tlock);
> =20
> -	    ents[entcount++] =3D tdc;
> +	    QAdd(&DirtyQ, &tdc->dirty);
>  	}
>      }
>      MReleaseWriteLock(&afs_xdcache);
> @@ -2616,9 +2614,11 @@
>       * afs_cacheInodep, and flush it.  Don't forget to put back
>       * the refcounts.
>       */
> -    for (i =3D 0; i < entcount; i++) {
> -	tdc =3D ents[i];
> =20
> +#define DQTODC(q)	((struct dcache *)(((char *) (q)) -=20
> sizeof(struct afs_q)))
> +
> +    for (tq =3D DirtyQ.prev; tq !=3D &DirtyQ; tq =3D QPrev(tq)) {
> +        tdc =3D DQTODC(tq);
>  	if (tdc->dflags & DFEntryMod) {
>  	    int wrLock;
> =20
> @@ -2638,7 +2638,6 @@
> =20
>  	afs_PutDCache(tdc);
>      }
> -    afs_osi_Free(ents, entmax * sizeof(struct dcache *));
> =20
>      MObtainWriteLock(&afs_xdcache, 617);
>      if (!touchedit && (cacheDiskType !=3D AFS_FCACHE_TYPE_MEM)) {
>=20
>=20