[OpenAFS-devel] lots of vmalloc failures lately...

Neulinger, Nathan nneul@umr.edu
Mon, 26 Apr 2004 11:04:36 -0500


The biggest problem here is that the alloc failure generates a log
message that gives no information about where it was called from, and it
usually doesn't oops till later in the code...=20

-- Nathan

------------------------------------------------------------
Nathan Neulinger                       EMail:  nneul@umr.edu
University of Missouri - Rolla         Phone: (573) 341-6679
UMR Information Technology             Fax: (573) 341-4216
=20

> -----Original Message-----
> From: openafs-devel-admin@openafs.org=20
> [mailto:openafs-devel-admin@openafs.org] On Behalf Of=20
> Neulinger, Nathan
> Sent: Monday, April 26, 2004 10:42 AM
> To: openafs-devel@openafs.org
> Subject: RE: [OpenAFS-devel] lots of vmalloc failures lately...=20
>=20
> Thanks, will try that... I should note - it's not just that one place,
> that just happens to be the most recent incident. I'm going to try and
> add some more failure checking and asserts to any other=20
> places that I've
> seen oops for recently. Maybe some of them can be handled a bit more
> gracefully.
>=20
> -- Nathan
>=20
> ------------------------------------------------------------
> Nathan Neulinger                       EMail:  nneul@umr.edu
> University of Missouri - Rolla         Phone: (573) 341-6679
> UMR Information Technology             Fax: (573) 341-4216
> =20
>=20
> > -----Original Message-----
> > From: chas williams (contractor) [mailto:chas@cmf.nrl.navy.mil]=20
> > Sent: Monday, April 26, 2004 10:29 AM
> > To: Neulinger, Nathan
> > Cc: openafs-devel@openafs.org
> > Subject: Re: [OpenAFS-devel] lots of vmalloc failures lately...=20
> >=20
> > In message=20
> > <5C51DC2B8353AB4BA2CD04B34F2EE79C3EFF20@umr-umail1.umr.edu>,"Neuling
> > er, Nathan" writes:
> > >I've been seeing a lot more vmalloc failures on a couple=20
> of my boxes
> > >recently... running with a 350MB disk cache.
> > >
> > >Unfortunately, the result is almost always that AFS on the=20
> > box becomes
> > >completely unusable, and the load usually shoots through the=20
> > roof over
> > >the next few minutes.
> >=20
> > i have a patch you could try --
> >=20
> > Index: src/afs/afs.h
> > =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /cvs/openafs/src/afs/afs.h,v
> > retrieving revision 1.45
> > diff -u -u -r1.45 afs.h
> > --- src/afs/afs.h	5 Apr 2004 22:39:51 -0000	1.45
> > +++ src/afs/afs.h	16 Apr 2004 21:18:59 -0000
> > @@ -926,6 +926,7 @@
> >  /* kept in memory */
> >  struct dcache {
> >      struct afs_q lruq;		/* Free queue for=20
> > in-memory images */
> > +    struct afs_q dirty;		/* Queue of dirty=20
> > entries that need written */
> >      afs_rwlock_t lock;		/* Protects validPos, some f */
> >      afs_rwlock_t tlock;		/* Atomizes updates to=20
> > refCount */
> >      afs_rwlock_t mflock;	/* Atomizes accesses/updates to=20
> > mflags */
> > Index: src/afs/afs_dcache.c
> > =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> > RCS file: /cvs/openafs/src/afs/afs_dcache.c,v
> > retrieving revision 1.40
> > diff -u -u -r1.40 afs_dcache.c
> > --- src/afs/afs_dcache.c	15 Jul 2003 23:14:12 -0000	1.40
> > +++ src/afs/afs_dcache.c	16 Apr 2004 21:18:59 -0000
> > @@ -2582,8 +2582,8 @@
> >  {
> >      register struct dcache *tdc;
> >      register afs_int32 i, touchedit =3D 0;
> > -    struct dcache **ents;
> > -    int entmax, entcount;
> > +
> > +    struct afs_q DirtyQ, *tq;
> > =20
> >      AFS_STATCNT(afs_WriteThroughDSlots);
> > =20
> > @@ -2593,9 +2593,7 @@
> >       * for every dcache entry, and exit xdcache.
> >       */
> >      MObtainWriteLock(&afs_xdcache, 283);
> > -    entmax =3D afs_cacheFiles;
> > -    ents =3D afs_osi_Alloc(entmax * sizeof(struct dcache *));
> > -    entcount =3D 0;
> > +    QInit(&DirtyQ);
> >      for (i =3D 0; i < afs_cacheFiles; i++) {
> >  	tdc =3D afs_indexTable[i];
> > =20
> > @@ -2605,7 +2603,7 @@
> >  	    tdc->refCount++;
> >  	    ReleaseWriteLock(&tdc->tlock);
> > =20
> > -	    ents[entcount++] =3D tdc;
> > +	    QAdd(&DirtyQ, &tdc->dirty);
> >  	}
> >      }
> >      MReleaseWriteLock(&afs_xdcache);
> > @@ -2616,9 +2614,11 @@
> >       * afs_cacheInodep, and flush it.  Don't forget to put back
> >       * the refcounts.
> >       */
> > -    for (i =3D 0; i < entcount; i++) {
> > -	tdc =3D ents[i];
> > =20
> > +#define DQTODC(q)	((struct dcache *)(((char *) (q)) -=20
> > sizeof(struct afs_q)))
> > +
> > +    for (tq =3D DirtyQ.prev; tq !=3D &DirtyQ; tq =3D QPrev(tq)) {
> > +        tdc =3D DQTODC(tq);
> >  	if (tdc->dflags & DFEntryMod) {
> >  	    int wrLock;
> > =20
> > @@ -2638,7 +2638,6 @@
> > =20
> >  	afs_PutDCache(tdc);
> >      }
> > -    afs_osi_Free(ents, entmax * sizeof(struct dcache *));
> > =20
> >      MObtainWriteLock(&afs_xdcache, 617);
> >      if (!touchedit && (cacheDiskType !=3D AFS_FCACHE_TYPE_MEM)) {
> >=20
> >=20
> _______________________________________________
> OpenAFS-devel mailing list
> OpenAFS-devel@openafs.org
> https://lists.openafs.org/mailman/listinfo/openafs-devel
>=20
>=20