[OpenAFS-devel] speed up namei volume operations by grouping fsyncs
Rainer Toebbicke
rtb@pclella.cern.ch
Fri, 21 Apr 2006 09:06:38 +0200
This is a multi-part message in MIME format.
--------------000402090402030105010209
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
The attached patch does not break with the namei tradition to fsync
the link count file at every change, although the need for this is
still debatable.
Rather, it groups fsyncs to the link count file in batches, with a
final fsync prior to releasing the volume back to the file server. It
hence speeds up clone and related operations (vos backup, vos move,
salvage, etc) tremendously, most noticeable for volumes with a large
number of files (hundreds of thousands), where we observed speedups in
excess of 200...
(Bcc-ed to openafs-bugs)
--
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Rainer Toebbicke
European Laboratory for Particle Physics(CERN) - Geneva, Switzerland
Phone: +41 22 767 8985 Fax: +41 22 767 7155
--------------000402090402030105010209
Content-Type: text/plain;
name="patch_namei_speedup"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="patch_namei_speedup"
unchanged:
--- openafs/src/vol/ihandle.h.o1381 2004-08-25 09:14:19.000000000 +0200
+++ openafs/src/vol/ihandle.h 2005-05-24 15:22:17.000000000 +0200
@@ -221,6 +221,7 @@
/* Flags for the Inode handle */
#define IH_REALLY_CLOSED 1
+#define IH_DELAY_SYNC 16
/* Hash function for inode handles */
#define I_HANDLE_HASH_SIZE 1024 /* power of 2 */
@@ -466,7 +467,7 @@
#define FDH_WRITE(H, B, S) OS_WRITE((H)->fd_fd, B, S)
#define FDH_SEEK(H, O, F) OS_SEEK((H)->fd_fd, O, F)
-#define FDH_SYNC(H) OS_SYNC((H)->fd_fd)
+#define FDH_SYNC(H) ((H->fd_ih->ih_flags&IH_DELAY_SYNC) ? 0 : OS_SYNC((H)->fd_fd))
#define FDH_TRUNC(H, L) OS_TRUNC((H)->fd_fd, L)
#define FDH_SIZE(H) OS_SIZE((H)->fd_fd)
unchanged:
--- openafs/src/vol/purge.c.o1381 2004-08-25 09:14:19.000000000 +0200
+++ openafs/src/vol/purge.c 2005-05-25 11:13:16.000000000 +0200
@@ -89,7 +89,7 @@
VOL_UNLOCK;
}
-#define MAXOBLITATONCE 200
+#define MAXOBLITATONCE 1000
/* delete a portion of an index, adjusting offset appropriately. Returns 0 if
things work and we should be called again, 1 if success full and done, and -1
if an error occurred. It adjusts offset appropriately on 0 or 1 return codes,
@@ -148,10 +148,13 @@
OS_SYNC(afile->str_fd);
/* finally, do the idec's */
+ V_linkHandle(avp)->ih_flags|=IH_DELAY_SYNC; /* severe performance penalty */
for (i = 0; i < iindex; i++) {
IH_DEC(V_linkHandle(avp), inodes[i], V_parentId(avp));
DOPOLL;
}
+ V_linkHandle(avp)->ih_flags&=~IH_DELAY_SYNC;
+ IH_CONDSYNC(V_linkHandle(avp));
/* return the new offset */
*aoffset = offset;
unchanged:
--- openafs/src/vol/clone.c.o1381 2004-08-25 09:14:19.000000000 +0200
+++ openafs/src/vol/clone.c 2005-05-27 09:39:04.000000000 +0200
@@ -227,6 +227,12 @@
decRock.vol = V_parentId(rwvp);
/* Read each vnode in the old volume's index file */
+ /* fsyncing the link count file for every inode has a severe
+ performance penalty, therefore we turn it off temporarily.
+ This assumes we're the only one on that file/volume -
+ in particular when we force the fsync later!
+ */
+ V_linkHandle(rwvp)->ih_flags|=IH_DELAY_SYNC;
for (offset = vcp->diskSize;
STREAM_READ(rwvnode, vcp->diskSize, 1, rwfile) == 1;
offset += vcp->diskSize) {
@@ -345,6 +351,12 @@
* and shouldn't do the idecs.
*/
error_exit:
+ /* Now take the fsync-bypass away again and force an fsync.
+ Again: assumes we're alone on this file, otherwise we need a lock!
+ */
+ V_linkHandle(rwvp)->ih_flags&=~IH_DELAY_SYNC;
+ IH_CONDSYNC(V_linkHandle(rwvp));
+
if (rwfile)
STREAM_CLOSE(rwfile);
if (clfilein)
unchanged:
--- openafs/src/vol/namei_ops.c.o1382 2004-11-09 18:16:40.000000000 +0100
+++ openafs/src/vol/namei_ops.c 2005-06-01 16:40:28.000000000 +0200
@@ -572,6 +572,8 @@
if (p2 == -1 && p3 == VI_LINKTABLE) {
/* hack at tmp to setup for set link count call. */
+ memset((void *)&tfd, 0, sizeof(FdHandle_t)); /* minimalistic still, but a little cleaner */
+ tfd.fd_ih = &tmp;
tfd.fd_fd = fd;
code = namei_SetLinkCount(&tfd, (Inode) 0, 1, 0);
}
only in patch2:
unchanged:
--- openafs/src/volser/dumpstuff.c.1rig 2005-11-02 12:39:29.000000000 +0100
+++ openafs/src/volser/dumpstuff.c 2006-02-14 14:49:58.000000000 +0100
@@ -1033,7 +1033,13 @@
tdelo = delo;
while (1) {
- if (ReadVnodes(iodp, vp, 0, b1, s1, b2, s2, tdelo)) {
+ int temprc;
+
+ V_linkHandle(avp)->ih_flags |= IH_DELAY_SYNC; /* Avoid repetitive fdsync()s on linkfile */
+ temprc = ReadVnodes(iodp, vp, 0, b1, s1, b2, s2, tdelo);
+ V_linkHandle(avp)->ih_flags &= ~IH_DELAY_SYNC; /* normal sync behaviour again */
+ IH_CONDSYNC(V_linkHandle(avp)); /* sync link file */
+ if (temprc) {
error = VOLSERREAD_DUMPERROR;
goto clean;
}
--------------000402090402030105010209--