[OpenAFS] namei interface lockf buggy on Solaris (and probably HP-UX and AIX)

John Rudd jrudd@ucsc.edu
Mon, 11 Sep 2006 03:01:15 -0700


Hm.  Your patch seems to do this:

change:
lock/unlock from current to eof

to:
seek 0
lock/unlock from (current=0) to eof


Which means you're potentially destroying the notion of "current" (I  
don't know if that's important in AFS code or not, but it seems like,  
at best, a bad idea ... and at worst, a potentially disastrous idea).


I would suggest:

save current to tmp
seek 0
lock/unlock from (current=0) to eof
seek tmp




On Sep 11, 2006, at 1:43 AM, Rainer Toebbicke wrote:

> The namei interface uses file locking extensively, implemented using  
> lockf() on Solaris, AIX & HP-UX.
>
> Unfortunately lockf() locks and unlocks from the *current position* to  
> whatever the argument says (end of file), moving the file pointer in  
> between becomes a problem for the subsequent unlock!  The result is  
> that frequently locks aren't released, but replaced by partial locks  
> on the file data just moved over.
>
> In consequence almost-deadlocks between fileserver and volserver  
> happen (although rarely), in particular on the "link count" file.  
> Together with the salvager which locks the partition a complete  
> deadlock can be provoked.
>
> The attached patch forces lockf to lock/unlock from 0, more closely  
> emulating flock().
>
>
>
> (Bcc'ed to openafs-bugs)
>
> --  
> =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 
> =-=-=-=
> Rainer Toebbicke
> European Laboratory for Particle Physics(CERN) - Geneva, Switzerland
> Phone: +41 22 767 8985       Fax: +41 22 767 7155
> --- openafs/src/vol/namei_ops.c.xxxx	2006-09-08 12:54:04.000000000  
> +0200
> +++ openafs/src/vol/namei_ops.c	2006-09-08 12:56:07.000000000 +0200
> @@ -869,6 +869,7 @@
>
>      if (lockit) {
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> +	lseek(h->fd_fd, (off_t) 0, SEEK_SET);
>  	if (lockf(h->fd_fd, F_LOCK, 0) < 0)
>  #else
>  	if (flock(h->fd_fd, LOCK_EX) < 0)
> @@ -888,7 +889,7 @@
>    bad_getLinkByte:
>      if (lockit)
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> -	lockf(h->fd_fd, F_ULOCK, 0);
> +    { lseek(h->fd_fd, (off_t) 0, SEEK_SET); lockf(h->fd_fd, F_ULOCK,  
> 0); }
>  #else
>  	flock(h->fd_fd, LOCK_UN);
>  #endif
> @@ -913,6 +914,7 @@
>
>      /* Only one manipulates at a time. */
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> +    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
>      if (lockf(fdP->fd_fd, F_LOCK, 0) < 0) {
>  #else
>      if (flock(fdP->fd_fd, LOCK_EX) < 0) {
> @@ -953,6 +955,7 @@
>      }
>      FDH_SYNC(fdP);
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> +    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
>      lockf(fdP->fd_fd, F_ULOCK, 0);
>  #else
>      flock(fdP->fd_fd, LOCK_UN);
> @@ -962,6 +965,7 @@
>
>    badGetFreeTag:
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> +    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
>      lockf(fdP->fd_fd, F_ULOCK, 0);
>  #else
>      flock(fdP->fd_fd, LOCK_UN);
> @@ -989,6 +993,7 @@
>
>      if (!locked) {
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> +	lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
>  	if (lockf(fdP->fd_fd, F_LOCK, 0) < 0) {
>  #else
>  	if (flock(fdP->fd_fd, LOCK_EX) < 0) {
> @@ -1032,6 +1037,7 @@
>
>    bad_SetLinkCount:
>  #if defined(AFS_AIX_ENV) || defined(AFS_SUN5_ENV) ||  
> defined(AFS_HPUX_ENV)
> +    lseek(fdP->fd_fd, (off_t) 0, SEEK_SET);
>      lockf(fdP->fd_fd, F_ULOCK, 0);
>  #else
>      flock(fdP->fd_fd, LOCK_UN);