[OpenAFS-devel] Re: pthreading the bosserver

chas williams - CONTRACTOR chas@cmf.nrl.navy.mil
Wed, 4 Sep 2013 14:06:26 -0400


--MP_/0T+_DKSUhNstAYeHsss=xEI
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

On Wed, 4 Sep 2013 12:58:22 -0400 (EDT)
Benjamin Kaduk <kaduk@mit.edu> wrote:

> On Fri, 9 Aug 2013, Benjamin Kaduk wrote:
> 
> > On Fri, 9 Aug 2013, Benjamin Kaduk wrote:
> >
> >> Maybe, I should just try to implement my idea and we will see if it works 
> >> or not.
> >
> > A sketch of what I had in mind is up at 
> > https://github.com/kaduk/openafs/commits/chas-bozo ; it passes minimal 
> > smoke-testing on my dev box (running a ptserver that doesn't have a prdb).
> > It does not have the extra "shutting down" flag value that jhutz mentioned, 
> > for now.
> >
> > At 72 insertions/75 deletions, it doesn't really have a "smaller code" 
> > endorsement going for it ... what do people think?
> 
> I'm assembling a changeset merging the still-useful bits of my patchset 
> with Chas's heavy lifting on the pthreading front, at 
> https://github.com/kaduk/openafs/commits/newbozo .  I changed the 
> commit-IDs for everything past the first commit (which was verified by 
> buildbot already), so as to not cause confusion on gerrit.  (I also 
> abandoned my gerrit changes for my old patchset.)
> 
> Once I have squahsed, reviewed and tested things, I plan to push it to 
> gerrit for general review.

I wrote what I think is a general fix for the problems mentioned in
this thread but haven't had much time to test it.  It is attached.  It
tries to match Jeff's wish list as much as possible.  Right now it lets
all apply operations run at the same time on a fixed list of inodes.
If you are in the middle of a restart, someone else can begin a restart
as well.  The side effect is that someone else can also run a status
as well.  If you wanted to serialize these, it would simply be changing
the newBnodes_lock to a write lock in all cases.

I am not thrilled about your code introducing what looks like C
primitives with BNODE_LOCK/BNODE_UNLOCK.  Atleast put some ()'s on the
end of the macros.  I am not sure this is entirely correct either:

    for (opr_queue_ScanSafe(&allBnodes, cursor, store )) {
        struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);

        bnode_Hold(tb);
        BNODE_UNLOCK;
        code = (*aproc) (tb, arock);
        bnode_Release(tb);
        BNODE_LOCK;

While store holds the pointer to the next bnode, you didn't hold a
reference to that bnode so it could potentially go away after
BNODE_UNLOCK.
--MP_/0T+_DKSUhNstAYeHsss=xEI
Content-Type: text/x-patch
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename=bozo.diff

diff --git a/src/bozo/bnode.c b/src/bozo/bnode.c
index 61cce27..f814d09 100644
--- a/src/bozo/bnode.c
+++ b/src/bozo/bnode.c
@@ -46,8 +46,7 @@ static PROCESS bproc_pid;	/**< pid of waker-upper */
 #endif
 struct opr_queue allBnodes;		/**< List of all bnodes */
 struct Lock allBnodes_lock;
-static struct opr_queue tempBnodes;	/**< Temporary list of all bnodes */
-static struct Lock tempBnodes_lock;
+struct Lock newBnodes_lock;		/**< Protects head of bnode list */
 static struct opr_queue allProcs;	/**< List of all processes for which we're waiting */
 struct Lock allProcs_lock;
 static struct opr_queue allTypes;	/**< List of all registered type handlers */
@@ -261,19 +260,18 @@ bnode_WaitAll(void)
     struct opr_queue *cursor, *store;
     afs_int32 code = 0;
 
-    ObtainWriteLock(&tempBnodes_lock);
-    ObtainReadLock(&allBnodes_lock);
+    ObtainReadLock(&newBnodes_lock);
 
+    ObtainReadLock(&allBnodes_lock);
     for (opr_queue_Scan(&allBnodes, cursor)) {
 	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
 
 	bnode_Hold(tb);
-	opr_queue_Append(&tempBnodes, &tb->tempq);
     }
     ReleaseReadLock(&allBnodes_lock);
 
-    for (opr_queue_Scan(&tempBnodes, cursor)) {
-	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, tempq);
+    for (opr_queue_Scan(&allBnodes, cursor)) {
+	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
 
 	code = bnode_WaitStatus(tb, tb->goal);
 	if (code)
@@ -281,13 +279,12 @@ bnode_WaitAll(void)
     }
 
   out:
-    for (opr_queue_ScanSafe(&tempBnodes, cursor, store)) {
-	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, tempq);
+    for (opr_queue_ScanSafe(&allBnodes, cursor, store)) {
+	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
 
-	opr_queue_Remove(&tb->tempq);
 	bnode_Release(tb);
     }
-    ReleaseWriteLock(&tempBnodes_lock);
+    ReleaseReadLock(&newBnodes_lock);
 
     return code;
 }
@@ -362,32 +359,30 @@ bnode_SetFileGoal(struct bnode *abnode, int agoal)
 
     if (abnode->fileGoal == agoal)
 	return 0;		/* already done */
-    ObtainWriteLock(&allBnodes_lock);
+    ObtainReadLock(&allBnodes_lock);
     abnode->fileGoal = agoal;
     WriteBozoFile(0);
-    ReleaseWriteLock(&allBnodes_lock);
+    ReleaseReadLock(&allBnodes_lock);
     return 0;
 }
 
 /* apply a function to all bnodes in the system */
 int
-bnode_ApplyInstance(int (*aproc) (struct bnode *tb, void *), void *arock)
+bnode_ApplyInstanceNoLock(int (*aproc) (struct bnode *tb, void *), void *arock)
 {
     struct opr_queue *cursor, *store;
     afs_int32 code = 0;
 
-    ObtainWriteLock(&tempBnodes_lock);
     ObtainReadLock(&allBnodes_lock);
     for (opr_queue_Scan(&allBnodes, cursor)) {
 	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
 
 	bnode_Hold(tb);
-	opr_queue_Append(&tempBnodes, &tb->tempq);
     }
     ReleaseReadLock(&allBnodes_lock);
 
-    for (opr_queue_Scan(&tempBnodes, cursor )) {
-	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, tempq);
+    for (opr_queue_Scan(&allBnodes, cursor )) {
+	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
 
 	code = (*aproc) (tb, arock);
 	if (code)
@@ -395,13 +390,22 @@ bnode_ApplyInstance(int (*aproc) (struct bnode *tb, void *), void *arock)
     }
 
   out:
-    for (opr_queue_ScanSafe(&tempBnodes, cursor, store)) {
-	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, tempq);
+    for (opr_queue_ScanSafe(&allBnodes, cursor, store)) {
+	struct bnode *tb = opr_queue_Entry(cursor, struct bnode, q);
 
-	opr_queue_Remove(&tb->tempq);
 	bnode_Release(tb);
     }
-    ReleaseWriteLock(&tempBnodes_lock);
+    return code;
+}
+
+int
+bnode_ApplyInstance(int (*aproc) (struct bnode *tb, void *), void *arock)
+{
+    afs_int32 code;
+
+    ObtainReadLock(&newBnodes_lock);
+    code = bnode_ApplyInstanceNoLock(aproc, arock);
+    ReleaseReadLock(&newBnodes_lock);
     return code;
 }
 
@@ -478,6 +482,7 @@ bnode_Create(char *atype, char *ainstance, struct bnode ** abp, char *ap1,
     struct stat tstat;
     afs_int32 code = 0;
 
+    ObtainWriteLock(&newBnodes_lock);
     ObtainWriteLock(&allBnodes_lock);
     if (bnode_FindInstanceNoLock(ainstance)) {
 	code = BZEXISTS;
@@ -527,12 +532,14 @@ bnode_Create(char *atype, char *ainstance, struct bnode ** abp, char *ap1,
 	WriteBozoFile(0);
 
     ReleaseWriteLock(&allBnodes_lock);
+    ReleaseWriteLock(&newBnodes_lock);
 
     bnode_SetStat(tb, tb->goal);	/* nudge it once */
     goto out;
 
   out_err:
     ReleaseWriteLock(&allBnodes_lock);
+    ReleaseWriteLock(&newBnodes_lock);
   out:
     return code;
 }
@@ -579,12 +586,13 @@ bnode_Hold(struct bnode *abnode)
 int
 bnode_Release(struct bnode *abnode)
 {
-    ObtainWriteLock(&allBnodes_lock);
     opr_Assert(abnode->refCount > 0);
     abnode->refCount--;
-    if (abnode->refCount == 0 && abnode->flags & BNODE_DELETE)
+    if (abnode->refCount == 0 && abnode->flags & BNODE_DELETE) {
+	ObtainWriteLock(&allBnodes_lock);
 	bnode_DeleteNoLock(abnode);
-    ReleaseWriteLock(&allBnodes_lock);
+	ReleaseWriteLock(&allBnodes_lock);
+    }
     return 0;
 }
 
@@ -656,7 +664,6 @@ bnode_InitBnode(struct bnode *abnode, struct bnode_ops *abnodeops,
     /* format the bnode properly */
     memset(abnode, 0, sizeof(struct bnode));
     opr_queue_Init(&abnode->q);
-    opr_queue_Init(&abnode->tempq);
 #ifdef AFS_PTHREAD_ENV
     opr_cv_init(&abnode->cv);
     opr_mutex_init(&abnode->mutex);
@@ -793,14 +800,14 @@ bproc(void *unused)
     while (1) {
 	/* first figure out how long to sleep */
 	nextTimeout = FT_ApproxTime() + MAXSLEEP;
-	ObtainReadLock(&allBnodes_lock);
+	ObtainWriteLock(&allBnodes_lock);
 	for (opr_queue_Scan(&allBnodes, cursor)) {
 	    tb = opr_queue_Entry(cursor, struct bnode, q);
 	    if (tb->flags & BNODE_NEEDTIMEOUT) {
 		nextTimeout = min(nextTimeout, tb->nextTimeout);
 	    }
 	}
-	ReleaseReadLock(&allBnodes_lock);
+	ReleaseWriteLock(&allBnodes_lock);
 	/* now nextTimeout has the time at which we should wakeup next */
 
 	/* sleep */
@@ -823,12 +830,12 @@ bproc(void *unused)
 
 	/* check all bnodes to see which ones need timeout events */
   retry:
-	ObtainReadLock(&allBnodes_lock);
+	ObtainWriteLock(&allBnodes_lock);
 	for (opr_queue_Scan(&allBnodes, cursor)) {
 	    tb = opr_queue_Entry(cursor, struct bnode, q);
 	    if ((tb->flags & BNODE_NEEDTIMEOUT) && now > tb->nextTimeout) {
 		bnode_Hold(tb);
-		ReleaseReadLock(&allBnodes_lock);
+		ReleaseWriteLock(&allBnodes_lock);
 
 		BOP_TIMEOUT(tb);
 		bnode_Check(tb);
@@ -839,7 +846,7 @@ bproc(void *unused)
 		goto retry;
 	    }
 	}
-	ReleaseReadLock(&allBnodes_lock);
+	ReleaseWriteLock(&allBnodes_lock);
 
 #ifndef AFS_PTHREAD_ENV
 	if (code < 0) {
@@ -1100,9 +1107,8 @@ bnode_Init(void)
     opr_queue_Init(&allProcs);
     Lock_Init(&allProcs_lock);
     opr_queue_Init(&allBnodes);
-    opr_queue_Init(&tempBnodes);
     Lock_Init(&allBnodes_lock);
-    Lock_Init(&tempBnodes_lock);
+    Lock_Init(&newBnodes_lock);
     memset(&bnode_stats, 0, sizeof(bnode_stats));
 
 #ifdef AFS_PTHREAD_ENV
@@ -1326,9 +1332,9 @@ bnode_NewProc(struct bnode *abnode, char *aexecString, char *coreName,
 	return code;
     opr_queue_Init(&tp->q);
     opr_Assert(abnode->refCount > 0);
-    ObtainReadLock(&allBnodes_lock);
+    ObtainWriteLock(&allBnodes_lock);	// @@@ how did we find this? should already have reference so read lock is fine?
     bnode_Hold(abnode);		/* hold a ref for duration of proc */
-    ReleaseReadLock(&allBnodes_lock);
+    ReleaseWriteLock(&allBnodes_lock);
     tp->bnode = abnode;
     tp->comLine = aexecString;
     tp->coreName = coreName;	/* may be null */
diff --git a/src/bozo/bnode_internal.h b/src/bozo/bnode_internal.h
index 69155a8..8c1151c 100644
--- a/src/bozo/bnode_internal.h
+++ b/src/bozo/bnode_internal.h
@@ -48,7 +48,6 @@ struct bnode_token {
 
 struct bnode {
     struct opr_queue q;		/* prev/next entry in top-level's list */
-    struct opr_queue tempq;	/* prev/next entry in temporary list */
     char *name;			/* instance name */
     afs_int32 nextTimeout;	/* next time this guy should be woken */
     afs_int32 period;		/* period between calls */
diff --git a/src/bozo/bosoprocs.c b/src/bozo/bosoprocs.c
index fd008d4..41842b1 100644
--- a/src/bozo/bosoprocs.c
+++ b/src/bozo/bosoprocs.c
@@ -36,6 +36,7 @@
 
 extern struct ktime bozo_nextRestartKT, bozo_nextDayKT;
 extern struct Lock allBnodes_lock;
+extern struct Lock newBnodes_lock;
 extern struct afsconf_dir *bozo_confdir;
 extern int bozo_newKTs;
 extern int DoLogging;
@@ -912,20 +913,23 @@ SBOZO_RestartAll(struct rx_call *acall)
     if (DoLogging)
 	bozo_Log("%s is executing RestartAll\n", caller);
 
+    ObtainReadLock(&newBnodes_lock);
+
     /* start shutdown of all processes */
-    code = bnode_ApplyInstance(sdproc, NULL);
+    code = bnode_ApplyInstanceNoLock(sdproc, NULL);
     if (code)
 	goto fail;
 
     /* wait for all done */
-    code = bnode_ApplyInstance(swproc, NULL);
+    code = bnode_ApplyInstanceNoLock(swproc, NULL);
     if (code)
 	goto fail;
 
     /* start them up again */
-    code = bnode_ApplyInstance(stproc, NULL);
+    code = bnode_ApplyInstanceNoLock(stproc, NULL);
 
   fail:
+    ReleaseReadLock(&newBnodes_lock);
     osi_auditU(acall, BOS_RestartAllEvent, code, AUD_END);
     return code;
 }
@@ -944,13 +948,15 @@ SBOZO_ReBozo(struct rx_call *acall)
     if (DoLogging)
 	bozo_Log("%s is executing ReBozo\n", caller);
 
+    ObtainReadLock(&newBnodes_lock);
+
     /* start shutdown of all processes */
-    code = bnode_ApplyInstance(sdproc, NULL);
+    code = bnode_ApplyInstanceNoLock(sdproc, NULL);
     if (code)
 	goto fail;
 
     /* wait for all done */
-    code = bnode_ApplyInstance(swproc, NULL);
+    code = bnode_ApplyInstanceNoLock(swproc, NULL);
     if (code)
 	goto fail;
 
@@ -1543,10 +1549,12 @@ bozo_ShutdownAndExit(void *param)
 	("Shutdown of BOS server and processes in response to signal %d\n",
 	 asignal);
 
+    ObtainReadLock(&newBnodes_lock);
+
     /* start shutdown of all processes */
-    if ((code = bnode_ApplyInstance(sdproc, NULL)) == 0) {
+    if ((code = bnode_ApplyInstanceNoLock(sdproc, NULL)) == 0) {
 	/* wait for shutdown to complete */
-	code = bnode_ApplyInstance(swproc, NULL);
+	code = bnode_ApplyInstanceNoLock(swproc, NULL);
     }
 
     if (code) {
diff --git a/src/bozo/bosprototypes.h b/src/bozo/bosprototypes.h
index 4d2132d..885383a 100644
--- a/src/bozo/bosprototypes.h
+++ b/src/bozo/bosprototypes.h
@@ -23,6 +23,7 @@ int bnode_HasCore(struct bnode *abnode);
 int bnode_WaitAll(void);
 int bnode_SetGoal(struct bnode *abnode, int agoal);
 int bnode_SetFileGoal(struct bnode *abnode, int agoal);
+int bnode_ApplyInstanceNoLock(int (*aproc)(struct bnode *, void *), void *arock);
 int bnode_ApplyInstance(int (*aproc)(struct bnode *, void *), void *arock);
 int bnode_Register(char *, struct bnode_ops *, int);
 int bnode_DeleteName(char *);

--MP_/0T+_DKSUhNstAYeHsss=xEI--