[OpenAFS] namei interface lockf buggy on Solaris (and probably HP-UX and AIX)

Rainer Toebbicke rtb@pclella.cern.ch
Mon, 11 Sep 2006 10:43:31 +0200


This is a multi-part message in MIME format.
--------------040500020903070901030703
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

The namei interface uses file locking extensively, implemented using 
lockf() on Solaris, AIX & HP-UX.

Unfortunately lockf() locks and unlocks from the *current position* to 
whatever the argument says (end of file), moving the file pointer in 
between becomes a problem for the subsequent unlock!  The result is 
that frequently locks aren't released, but replaced by partial locks 
on the file data just moved over.

In consequence almost-deadlocks between fileserver and volserver 
happen (although rarely), in particular on the "link count" file. 
Together with the salvager which locks the partition a complete 
deadlock can be provoked.

The attached patch forces lockf to lock/unlock from 0, more closely 
emulating flock().



(Bcc'ed to openafs-bugs)

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Rainer Toebbicke
European Laboratory for Particle Physics(CERN) - Geneva, Switzerland
Phone: +41 22 767 8985       Fax: +41 22 767 7155

--------------040500020903070901030703
Content-Type: text/plain;
 name="patch_namei_lockf"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="patch_namei_lockf"

--- openafs/src/vol/namei_ops.c.xxxx	2006-09-08 12:54:04.000000000 +0200
+++ openafs/src/vol/namei_ops.c	2006-09-08 12:56:07.000000000 +0200
@@ -869,6 +869,7 @@
 
     if (lockit) {
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+	lseek(h->fd_fd, (off_t) 0, SEEK_SET);
 	if (lockf(h->fd_fd, F_LOCK, 0) < 0)
 #else
 	if (flock(h->fd_fd, LOCK_EX) < 0)
@@ -888,7 +889,7 @@
   bad_getLinkByte:
     if (lockit)
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
-	lockf(h->fd_fd, F_ULOCK, 0);
+    { lseek(h->fd_fd, (off_t) 0, SEEK_SET); lockf(h->fd_fd, F_ULOCK, 0); }
 #else
 	flock(h->fd_fd, LOCK_UN);
 #endif
@@ -913,6 +914,7 @@
 
     /* Only one manipulates at a time. */
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
     if (lockf(fdP->fd_fd, F_LOCK, 0) < 0) {
 #else
     if (flock(fdP->fd_fd, LOCK_EX) < 0) {
@@ -953,6 +955,7 @@
     }
     FDH_SYNC(fdP);
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
     lockf(fdP->fd_fd, F_ULOCK, 0);
 #else
     flock(fdP->fd_fd, LOCK_UN);
@@ -962,6 +965,7 @@
 
   badGetFreeTag:
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
     lockf(fdP->fd_fd, F_ULOCK, 0);
 #else
     flock(fdP->fd_fd, LOCK_UN);
@@ -989,6 +993,7 @@
 
     if (!locked) {
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+	lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
 	if (lockf(fdP->fd_fd, F_LOCK, 0) < 0) {
 #else
 	if (flock(fdP->fd_fd, LOCK_EX) < 0) {
@@ -1032,6 +1037,7 @@
 
   bad_SetLinkCount:
 #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) || defined(AFS_HPUX_ENV)
+    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
     lockf(fdP->fd_fd, F_ULOCK, 0);
 #else
     flock(fdP->fd_fd, LOCK_UN);

--------------040500020903070901030703--