[OpenAFS-devel] CheckHost patch version one

Jim Rees rees@umich.edu
Tue, 15 Nov 2005 18:41:02 -0500


Here is version one of the CheckHost patch.  It's not as complicated as it
looks.  The main change is to drop the host lock around the rpc.  The
"after" checks are minimal.  In the init-cb3 case another thread may have
marked the host down, in which case no harm done.  I do check that ALTADDR
hasn't been set in another thread, which would indicate the host may be up
at another address.  Jeffrey thinks I should actually check that the ip/port
hasn't changed, and he may be right.

In the Probe case I don't know what the races might be.

I'm still working on this.  Suggestions welcome.  I may yet give up on this
line of action and re-write the up/down logic as some have urged me to do.

--- host.c-	Thu Jul 28 16:52:21 2005
+++ host.c	Tue Nov 15 18:30:08 2005
@@ -585,7 +585,7 @@
     host->hostFlags = 0;
     host->hcps.prlist_val = NULL;
     host->hcps.prlist_len = 0;
-    host->interface = 0;
+    host->interface = NULL;
 #ifdef undef
     host->hcpsfailed = 0;	/* save cycles */
     h_gethostcps(host);		/* do this under host hold/lock */
@@ -2102,6 +2102,7 @@
 {
     register struct client *client;
     struct rx_connection *cb_conn = NULL;
+    int use_cb3;
     int code;
 
     /* Host is held by h_Enumerate */
@@ -2114,81 +2115,80 @@
     }
     if (host->LastCall < checktime) {
 	h_Lock_r(host);
-	cb_conn = host->callback_rxcon;
-	rx_GetConnection(cb_conn);
 	if (!(host->hostFlags & HOSTDELETED)) {
+	    cb_conn = host->callback_rxcon;
+	    rx_GetConnection(cb_conn);
 	    if (host->LastCall < clientdeletetime) {
+		/* Note:  it's safe to delete hosts even if they have call
+		 * back state, because break delayed callbacks (called when a
+		 * message is received from the workstation) will always send a 
+		 * break all call backs to the workstation if there is no
+		 * callback.
+		 */
 		host->hostFlags |= HOSTDELETED;
 		if (!(host->hostFlags & VENUSDOWN)) {
 		    host->hostFlags &= ~ALTADDR;	/* alternate address invalid */
-		    if (host->interface) {
-			H_UNLOCK;
-			code =
-			    RXAFSCB_InitCallBackState3(cb_conn,
-						       &FS_HostUUID);
-			H_LOCK;
-		    } else {
-			H_UNLOCK;
-			code =
-			    RXAFSCB_InitCallBackState(cb_conn);
-			H_LOCK;
-		    }
-		    host->hostFlags |= ALTADDR;	/* alternate addresses valid */
-		    if (code) {
-			char hoststr[16];
-			(void)afs_inet_ntoa_r(host->host, hoststr);
-			ViceLog(0,
-				("CB: RCallBackConnectBack (host.c) failed for host %s:%d\n",
-				 hoststr, ntohs(host->port)));
-			host->hostFlags |= VENUSDOWN;
-		    }
-		    /* Note:  it's safe to delete hosts even if they have call
-		     * back state, because break delayed callbacks (called when a
-		     * message is received from the workstation) will always send a 
-		     * break all call backs to the workstation if there is no
-		     *callback.
-		     */
-		}
-	    } else {
-		if (!(host->hostFlags & VENUSDOWN) && host->cblist) {
-		    if (host->interface) {
-			afsUUID uuid = host->interface->uuid;
-			H_UNLOCK;
-			code = RXAFSCB_ProbeUuid(cb_conn, &uuid);
-			H_LOCK;
+		    use_cb3 = (host->interface != NULL);
+		    h_Unlock_r(host);
+		    H_UNLOCK;
+		    if (use_cb3)
+			code = RXAFSCB_InitCallBackState3(cb_conn, &FS_HostUUID);
+		    else
+			code = RXAFSCB_InitCallBackState(cb_conn);
+		    H_LOCK;
+		    h_Lock_r(host);
+		    if (!(host->hostFlags & ALTADDR)) {
+			host->hostFlags |= ALTADDR; /* alternate addresses valid */
 			if (code) {
-			    if (MultiProbeAlternateAddress_r(host)) {
-				char hoststr[16];
-				(void)afs_inet_ntoa_r(host->host, hoststr);
-				ViceLog(0,
-					("ProbeUuid failed for host %s:%d\n",
-					 hoststr, ntohs(host->port)));
-				host->hostFlags |= VENUSDOWN;
-			    }
+			    char hoststr[16];
+			    (void)afs_inet_ntoa_r(host->host, hoststr);
+			    ViceLog(0,
+				    ("CB: RCallBackConnectBack (host.c) failed for host %s:%d\n",
+				     hoststr, ntohs(host->port)));
+			    host->hostFlags |= VENUSDOWN;
 			}
-		    } else {
-			H_UNLOCK;
-			code = RXAFSCB_Probe(cb_conn);
-			H_LOCK;
-			if (code) {
+		    }
+		}
+	    } else if (!(host->hostFlags & VENUSDOWN) && host->cblist) {
+		if (host->interface) {
+		    afsUUID uuid = host->interface->uuid;
+		    h_Unlock_r(host);
+		    H_UNLOCK;
+		    code = RXAFSCB_ProbeUuid(cb_conn, &uuid);
+		    H_LOCK;
+		    h_Lock_r(host);
+		    if (code) {
+			if (MultiProbeAlternateAddress_r(host)) {
 			    char hoststr[16];
 			    (void)afs_inet_ntoa_r(host->host, hoststr);
 			    ViceLog(0,
-				    ("Probe failed for host %s:%d\n", hoststr,
-				     ntohs(host->port)));
+				    ("ProbeUuid failed for host %s:%d\n",
+				     hoststr, ntohs(host->port)));
 			    host->hostFlags |= VENUSDOWN;
 			}
 		    }
+		} else {
+		    h_Unlock_r(host);
+		    H_UNLOCK;
+		    code = RXAFSCB_Probe(cb_conn);
+		    H_LOCK;
+		    h_Lock_r(host);
+		    if (code) {
+			char hoststr[16];
+			(void)afs_inet_ntoa_r(host->host, hoststr);
+			ViceLog(0,
+				("Probe failed for host %s:%d\n", hoststr,
+				 ntohs(host->port)));
+			host->hostFlags |= VENUSDOWN;
+		    }
 		}
 	    }
 	}
-	H_UNLOCK;
-	rx_PutConnection(cb_conn);
-	cb_conn=NULL;
-	H_LOCK;
 	h_Unlock_r(host);
     }
     H_UNLOCK;
+    if (cb_conn != NULL)
+	rx_PutConnection(cb_conn);
     return held;
 
 }				/*CheckHost */