[OpenAFS-devel] Byte-range Locking in Linux CM (Implements Nathan Neulinger Proposal,
Linux)
Matt Benjamin
matt@linuxbox.com
Tue, 05 Apr 2005 16:18:52 -0400
This is a multi-part message in MIME format.
--------------080800090103000700080305
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
AFS Folk,
Jhutz has reviewed--he at least agrees the code likely does what is
intended, which is to implement Nathan Neulinger's locking proposal
phase 1, where a CM implements local byte-range locking, and shadows
such locks with whole-file locks on the AFS fileservers. Posting to dev
and bugs, per Jeff.
Change is confined to osi_vnodeops.c, except for a preprocessor define
in afs_vnop_flock.c (which would presumably get switched on somehow or
other).
Patch is against 1.3.80.
Matt
--------------080800090103000700080305
Content-Type: text/x-patch;
name="oafs-1380-bylock-2.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="oafs-1380-bylock-2.patch"
diff -Nur openafs-1.3.80/src/afs/LINUX/osi_vnodeops.c openafs-1.3.80-bylock/src/afs/LINUX/osi_vnodeops.c
--- openafs-1.3.80/src/afs/LINUX/osi_vnodeops.c 2005-03-11 01:51:11.000000000 -0500
+++ openafs-1.3.80-bylock/src/afs/LINUX/osi_vnodeops.c 2005-03-28 18:01:24.665912360 -0500
@@ -637,11 +637,77 @@
}
+static inline int posix_lock_file_f(struct file *fp, struct file_lock *flp)
+{
+#ifdef AFS_LINUX26_ENV
+ return posix_lock_file(fp, flp);
+#else
+ return posix_lock_file(fp, flp, 0);
+#endif
+}
+
+/* would like to use a std define here, MAX_LFS_SIZE is not it */
+#ifdef AFS_LARGEFILE_ENV
+#define AFS_LINUX_LOCK_RANGE_MAX 0x7fffffffffffffffULL
+#else
+#define AFS_LINUX_LOCK_RANGE_MAX 0x7fffffffL
+#endif
+
+/* from fs/locks.c */
+#define for_each_lock(inode, lockp) \
+ for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
+
+static inline int diag_dump_locks(struct file *fp) {
+ /* print all the lock, and lock owners, on inode pointed to by fp */
+ struct file_lock **iter;
+ struct inode *inode = fp->f_dentry->d_inode;
+ printk("diag_dump_locks:\n");
+ for_each_lock(inode, iter) {
+ struct file_lock *flp = *iter;
+ printk("Lock range found: pid/start/end [%d, %Ld, %Ld]\n",
+ flp->fl_pid,
+ (unsigned long long) flp->fl_start,
+ (unsigned long long) flp->fl_end);
+ }
+ return 0;
+}
+
+static inline int pid_has_lock(struct file *fp, afs_int32 pid) {
+ /* return true if pid has (any) lock on inode pointed to by fp */
+ int found = 0;
+ struct file_lock **iter;
+ struct inode *inode = fp->f_dentry->d_inode;
+ for_each_lock(inode, iter) {
+ struct file_lock *flp = *iter;
+ if(flp->fl_pid == pid) {
+ found = 1;
+ goto pid_has_lock_out;
+ }
+ }
+
+pid_has_lock_out:
+ return found;
+}
+
+static inline int try_wholefile_wrlock(struct file *fp, struct file_lock *flp)
+{
+ struct file_lock* glflp;
+ struct file_lock* try_flp = kmalloc(sizeof(struct file_lock), GFP_NOIO);
+ memcpy(try_flp, flp, sizeof(struct file_lock));
+ try_flp->fl_start = 0;
+ try_flp->fl_end = AFS_LINUX_LOCK_RANGE_MAX;
+ try_flp->fl_type = F_WRLCK;
+ glflp = posix_test_lock(fp, try_flp);
+ kfree(try_flp);
+ return (glflp == NULL);
+}
static int
afs_linux_lock(struct file *fp, int cmd, struct file_lock *flp)
{
+
int code = 0;
+ struct file_lock *glflp;
struct vcache *vcp = ITOAFS(FILE_INODE(fp));
cred_t *credp = crref();
struct AFS_FLOCK flock;
@@ -651,8 +717,8 @@
flock.l_pid = flp->fl_pid;
flock.l_whence = 0;
flock.l_start = flp->fl_start;
- flock.l_len = flp->fl_end - flp->fl_start;
-
+ flock.l_len = flp->fl_end - flp->fl_start;
+
/* Safe because there are no large files, yet */
#if defined(F_GETLK64) && (F_GETLK != F_GETLK64)
if (cmd == F_GETLK64)
@@ -661,18 +727,95 @@
cmd = F_SETLK;
else if (cmd == F_SETLKW64)
cmd = F_SETLKW;
-#endif /* F_GETLK64 && F_GETLK != F_GETLK64 */
+#endif /* F_GETLK64 && F_GETLK != F_GETLK64 */
+
+ AFS_GLOCK();
+
+ switch(cmd) {
+ case F_GETLK:
+ case F_GETLK64:
+ glflp = posix_test_lock(fp, flp);
+ if(glflp == NULL) {
+ /* no conflicting lock found */
+ flp->fl_type = F_UNLCK;
+ } else {
+ /* found potential conflict, need to update called-with structure */
+ flp->fl_pid = glflp->fl_pid;
+ flp->fl_start = glflp->fl_start;
+ flp->fl_end = glflp->fl_end;
+ flp->fl_type = glflp->fl_type;
+ goto lock_out; /* code==0 */
+ }
+ /* if there is a conflicting lock, it's on the server */
+ goto lock_delegate_afs;
+ break;
+ case F_SETLK:
+ case F_SETLK64:
+ case F_SETLKW64:
+ /* return early, if (un)expected unlock of unlocked range (eg, POSIX on-close) */
+ if((flp->fl_type == F_UNLCK) && !pid_has_lock(fp, flp->fl_pid)) {
+ goto lock_out;
+ }
+ switch(flp->fl_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ code = -(posix_lock_file_f(fp, flp));
+ switch(code) {
+ case 0:
+ /* vfs lock is granted */
+ if(vcp->flockCount != -1) {
+ /* must get afs whole-file lock (unless req. excl. and we have it already) */
+ code = afs_lockctl(vcp, &flock, cmd, credp);
+ if(-code != 0) {
+ /* server lock conflict, roll back vfs lock */
+ flp->fl_type = F_UNLCK;
+ posix_lock_file_f(fp, flp);
+ }
+ goto lock_convert_flock;
+ }
+ break;
+ break;
+ case EAGAIN:
+ /* conflicting lock */
+ break;
+ case EDEADLK:
+ break;
+ default:
+ /* posix_lock_file meltdown, miss pussycat */
+ break;
+ } /* code */
+ break;
+ case F_UNLCK:
+ /* if server byte-range locks, first check if op unlocks a write lock,
+ and flush range to server in that case */
+ code = -(posix_lock_file_f(fp, flp));
+ /* only unlock server ex lock on last unlock of range wr unlock */
+ if(try_wholefile_wrlock(fp, flp)) {
+ goto lock_delegate_afs;
+ }
+ break;
+ } /* flp->fl_type */
+ break;
+ } /* cmd */
- AFS_GLOCK();
+ goto lock_out;
+
+lock_delegate_afs:
+
+ /* delegate to AFS-- we expect byte-range locks to map to whole-file locks */
code = afs_lockctl(vcp, &flock, cmd, credp);
- AFS_GUNLOCK();
+
+lock_convert_flock:
/* Convert flock back to Linux's file_lock */
flp->fl_type = flock.l_type;
flp->fl_pid = flock.l_pid;
flp->fl_start = flock.l_start;
flp->fl_end = flock.l_start + flock.l_len;
+
+lock_out:
+ AFS_GUNLOCK();
crfree(credp);
return -code;
diff -Nur openafs-1.3.80/src/afs/VNOPS/afs_vnop_flock.c openafs-1.3.80-bylock/src/afs/VNOPS/afs_vnop_flock.c
--- openafs-1.3.80/src/afs/VNOPS/afs_vnop_flock.c 2004-12-07 01:12:13.000000000 -0500
+++ openafs-1.3.80-bylock/src/afs/VNOPS/afs_vnop_flock.c 2005-03-28 18:01:13.924545296 -0500
@@ -547,11 +547,16 @@
#endif
/* next line makes byte range locks always succeed,
* even when they should block */
+
+#define AFS_LOCAL_BYTERANGE_LOCKS 1
+#ifndef AFS_LOCAL_BYTERANGE_LOCKS
if (af->l_whence != 0 || af->l_start != 0 || af->l_len != 0) {
DoLockWarning();
afs_PutFakeStat(&fakestate);
return 0;
}
+#endif /* AFS_LOCAL_BYTERANGE_LOCKS */
+
/* otherwise we can turn this into a whole-file flock */
if (af->l_type == F_RDLCK)
code = LOCK_SH;
@@ -607,7 +612,6 @@
struct AFS_FLOCK flock;
lockIdSet(&flock, NULL, clid);
-
ObtainWriteLock(&avc->lock, 122);
if (avc->flockCount == 0) {
/*
--------------080800090103000700080305--