[OpenAFS] Moving volume to server with existing RO replica
Frank Burkhardt
fbo2@gmx.net
Tue, 20 Apr 2004 12:15:46 +0200
--1yeeQ81UyVL57Vl7
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hi,
On Mon, Apr 19, 2004 at 10:48:21AM -0700, Mike Fedyk wrote:
> Frank, did you get any response?
[snip]
> > I wrote a "vos convert" two years ago for MR-AFS and Thomas M?ller from
> > tu.chemnitz.de
> > ported this to OpenAFS and has based his backup concept on this feature.
> > So ask him where to get it. (It requires also changes to the volserver,
> > of course, not
> > just to the vos command).
>
> I'll ask him by private mail.
[snip]
Yes.
Thomas Mueller sent me a patch (see attached file) against OpenAFS-1.2.11 -
works like a charm. I built debian-packages containing the patch. Drop me a
mail if you need them.
Regards,
Frank
--1yeeQ81UyVL57Vl7
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="openafs-vos-convert-1.2.11-tucz.patch"
diff -Naurw openafs-vos-convert.orig/src/vol/namei_ops.c openafs-vos-convert/src/vol/namei_ops.c
--- openafs-vos-convert.orig/src/vol/namei_ops.c Wed Aug 28 05:57:37 2002
+++ openafs-vos-convert/src/vol/namei_ops.c Mon Nov 17 13:34:25 2003
@@ -42,6 +42,7 @@
#include "viceinode.h"
#include "voldefs.h"
#include "partition.h"
+#include <afs/errors.h>
extern char *volutil_PartitionName_r(int volid, char *buf, int buflen);
@@ -652,9 +653,13 @@
}
}
if (count == 0 ) {
- IHandle_t th = *ih;
- th.ih_ino = ino;
- namei_HandleToName(&name, &th);
+ IHandle_t *th;
+ IH_INIT(th, ih->ih_dev, ih->ih_vid, ino);
+ if (th->ih_refcnt > 1)
+ Log("Warning: Leaked ref on ihandle dev %d vid %d ino %lld\n",
+ th->ih_dev, th->ih_vid, (int64_t) th->ih_ino);
+ namei_HandleToName(&name, th);
+ IH_RELEASE(th);
code = unlink(name.n_path);
}
FDH_CLOSE(fdP);
@@ -1361,7 +1366,212 @@
}
return 0;
}
+/*
+ * Convert the VolumeInfo file from RO to RW
+ * this routine is called by namei_convertROtoRWvolume()
+ */
+
+static afs_int32 convertVolumeInfo(fdr, fdw, vid)
+ int fdr;
+ int fdw;
+ afs_uint32 vid;
+{
+ struct VolumeDiskData vd;
+ char *p;
+
+ if (read(fdr, &vd, sizeof(struct VolumeDiskData)) != sizeof(struct VolumeDiskData)) {
+ Log("1 convertVolumeInfo: read failed for %lu with code %d\n",
+ vid, errno);
+ return -1;
+ }
+ vd.restoredFromId = vd.id; /* remember the RO volume here */
+ vd.cloneId = vd.id;
+ vd.id = vd.parentId;
+ vd.type = RWVOL;
+ vd.dontSalvage = 0;
+ vd.uniquifier += 5000; /* just in case there are still file copies from
+ the old RW volume around */
+ p = strrchr(vd.name, '.');
+ if (p && !strcmp(p, ".readonly")) {
+ bzero(p, 8);
+ }
+ if (write(fdw, &vd, sizeof(struct VolumeDiskData)) != sizeof(struct VolumeDiskData)) {
+ Log("1 convertVolumeInfo: write failed for %lu with code %d\n",
+ vid, errno);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Convert a RO-volume into a RW-volume
+ *
+ * This function allows to recover very fast from the loss of a partition
+ * if from RO-copies of all volumes exist on another partition.
+ * Then these RO-volumes can be made to the new RW-volumes.
+ * Backup of RW-volumes consists in "vos release".
+ *
+ * We must make sure in this partition exists only the RO-volume which
+ * is typical for remote replicas.
+ *
+ * Then the linktable is already ok,
+ * the vnode files need to be renamed
+ * the volinfo file needs to be replaced by another one with
+ * slightly different contents and new name.
+ * The volume header file of the RO-volume in the /vicep<x> directory
+ * is destroyed by this call. A new header file for the RW-volume must
+ * be created after return from this routine.
+ */
+int namei_ConvertROtoRWvolume(IHandle_t * h, afs_uint32 vid)
+{
+ namei_t n;
+ char dir_name[512], oldpath[512], newpath[512];
+ char smallName[64];
+ char largeName[64];
+ char infoName[64];
+ IHandle_t t_ih;
+ char infoSeen = 0;
+ char smallSeen = 0;
+ char largeSeen = 0;
+ char linkSeen = 0;
+ int code, fd, fd2;
+ char *p;
+ DIR *dirp;
+ struct dirent *dp;
+ struct ViceInodeInfo info;
+
+ namei_HandleToName(&n, h);
+ strcpy(dir_name, n.n_path);
+ p = strrchr(dir_name, '/');
+ *p = 0;
+ dirp = opendir(dir_name);
+ if (!dirp) {
+ Log("1 namei_ConvertROtoRWvolume: Could not opendir(%s)\n",
+ dir_name);
+ return EIO;
+ }
+
+ while (dp = readdir(dirp)) {
+ /* struct ViceInodeInfo info; */
+
+ if (*dp->d_name == '.') continue;
+ if (DecodeInode(dir_name, dp->d_name, &info, h->ih_vid)<0) {
+ Log("1 namei_ConvertROtoRWvolume: DecodeInode failed for %s/%s\n",
+ dir_name, dp->d_name);
+ closedir(dirp);
+ return -1;
+ }
+ if (info.u.param[1] != -1) {
+ Log("1 namei_ConvertROtoRWvolume: found other than volume special file %s/%s\n",
+ dir_name, dp->d_name);
+ closedir(dirp);
+ return -1;
+ }
+ if (info.u.param[0] != vid) {
+ if (info.u.param[0] == h->ih_vid) {
+ if (info.u.param[2] == VI_LINKTABLE) { /* link table */
+ linkSeen = 1;
+ continue;
+ }
+ }
+ Log("1 namei_ConvertROtoRWvolume: found special file %s/%s for volume %lu\n",
+ dir_name, dp->d_name, info.u.param[0]);
+ closedir(dirp);
+ return VVOLEXISTS;
+ }
+ if (info.u.param[2] == VI_VOLINFO) { /* volume info file */
+ strcpy(infoName, dp->d_name);
+ infoSeen = 1;
+ }
+ else if (info.u.param[2] == VI_SMALLINDEX) { /* small vnodes file */
+ strcpy(smallName, dp->d_name);
+ smallSeen = 1;
+ }
+ else if (info.u.param[2] == VI_LARGEINDEX) { /* large vnodes file */
+ strcpy(largeName, dp->d_name);
+ largeSeen = 1;
+ }
+ else {
+ closedir(dirp);
+ Log("1 namei_ConvertROtoRWvolume: unknown type %d of special file found : %s/%s\n",
+ info.u.param[2], dir_name, dp->d_name);
+ return -1;
+ }
+ }
+ closedir(dirp);
+
+ if (!infoSeen || !smallSeen || !largeSeen || !linkSeen) {
+ Log("1 namei_ConvertROtoRWvolume: not all special files found in %s\n",
+ dir_name);
+ return -1;
+ }
+
+ /*
+ * If we come here then there was only a RO-volume and we can safely
+ * proceed.
+ */
+
+ bzero(&t_ih, sizeof(t_ih));
+ t_ih.ih_dev = h->ih_dev;
+ t_ih.ih_vid = h->ih_vid;
+
+ sprintf(oldpath, "%s/%s", dir_name, infoName);
+ fd = open(oldpath, O_RDWR, 0);
+ if (fd < 0) {
+ Log("1 namei_ConvertROtoRWvolume: could not open RO info file: %s\n",
+ oldpath);
+ return -1;
+ }
+ t_ih.ih_ino = namei_MakeSpecIno(h->ih_vid, VI_VOLINFO);
+ namei_HandleToName(&n, &t_ih);
+ fd2 = open(n.n_path, O_CREAT|O_EXCL|O_TRUNC|O_RDWR, 0);
+ if (fd2 < 0) {
+ Log("1 namei_ConvertROtoRWvolume: could not create RW info file: %s\n",
+ n.n_path);
+ close(fd);
+ return -1;
+ }
+ code = convertVolumeInfo(fd, fd2, h->ih_vid);
+ close(fd);
+ if (code) {
+ close(fd2);
+ unlink(n.n_path);
+ return -1;
+ }
+ SetOGM(fd2, h->ih_vid, 1);
+ close(fd2);
+ t_ih.ih_ino = namei_MakeSpecIno(h->ih_vid, VI_SMALLINDEX);
+ namei_HandleToName(&n, &t_ih);
+ sprintf(newpath, "%s/%s", dir_name, smallName);
+ fd = open(newpath, O_RDWR, 0);
+ if (fd < 0) {
+ Log("1 namei_ConvertROtoRWvolume: could not open SmallIndex file: %s\n",
+ newpath);
+ return -1;
+ }
+ SetOGM(fd, h->ih_vid, 2);
+ close(fd);
+ link(newpath, n.n_path);
+ unlink(newpath);
+
+ t_ih.ih_ino = namei_MakeSpecIno(h->ih_vid, VI_LARGEINDEX);
+ namei_HandleToName(&n, &t_ih);
+ sprintf(newpath, "%s/%s", dir_name, largeName);
+ fd = open(newpath, O_RDWR, 0);
+ if (fd < 0) {
+ Log("1 namei_ConvertROtoRWvolume: could not open LargeIndex file: %s\n",
+ newpath);
+ return -1;
+ }
+ SetOGM(fd, h->ih_vid, 3);
+ close(fd);
+ link(newpath, n.n_path);
+ unlink(newpath);
+
+ unlink(oldpath);
+ return 0;
+}
/* PrintInode
*
diff -Naurw openafs-vos-convert.orig/src/volser/volint.xg openafs-vos-convert/src/volser/volint.xg
--- openafs-vos-convert.orig/src/volser/volint.xg Sat Nov 4 11:06:33 2000
+++ openafs-vos-convert/src/volser/volint.xg Tue Aug 12 14:05:48 2003
@@ -47,6 +47,7 @@
#define VOLSETINFO 126
#define VOLXLISTPARTITIONS 127
#define VOLFORWARDMULTIPLE 128
+#define VOLCONVERTRO 65536
const SIZE = 1024;
@@ -390,3 +391,8 @@
IN struct restoreCookie *cookie,
OUT manyResults *results
) = VOLFORWARDMULTIPLE;
+
+proc ConvertROtoRWvolume(
+ IN afs_int32 partid,
+ IN afs_int32 volid
+) = VOLCONVERTRO;
diff -Naurw openafs-vos-convert.orig/src/volser/volprocs.c openafs-vos-convert/src/volser/volprocs.c
--- openafs-vos-convert.orig/src/volser/volprocs.c Thu May 15 16:11:36 2003
+++ openafs-vos-convert/src/volser/volprocs.c Mon Nov 17 13:36:30 2003
@@ -2681,6 +2682,114 @@
return error;
}
+#ifdef AFS_NAMEI_ENV
+/*
+ * Inode number format (from namei_ops.c):
+ * low 26 bits - vnode number - all 1's if volume special file.
+ * next 3 bits - tag
+ * next 3 bits spare (0's)
+ * high 32 bits - uniquifier (regular) or type if spare
+ */
+#define NAMEI_VNODEMASK 0x003ffffff
+#define NAMEI_TAGMASK 0x7
+#define NAMEI_TAGSHIFT 26
+#define NAMEI_UNIQMASK 0xffffffff
+#define NAMEI_UNIQSHIFT 32
+#define NAMEI_INODESPECIAL ((Inode)NAMEI_VNODEMASK)
+#define NAMEI_VNODESPECIAL NAMEI_VNODEMASK
+#endif /* AFS_NAMEI_ENV */
+
+afs_int32 SAFSVolConvertROtoRWvolume(acid, partId, volumeId)
+ struct rx_call *acid;
+ afs_int32 partId;
+ afs_int32 volumeId;
+{
+#ifdef AFS_NAMEI_ENV
+ DIR *dirp;
+ char pname[16];
+ char volname[20];
+ afs_int32 error = 0;
+ afs_int32 volid;
+ int found = 0;
+ char caller[MAXKTCNAMELEN];
+ char headername[16];
+ char opath[256];
+ char npath[256];
+ struct VolumeDiskHeader h;
+ int fd;
+ IHandle_t *ih;
+ Inode ino;
+ struct DiskPartition *dp;
+
+ if (!afsconf_SuperUser(tdir, acid, caller)) return VOLSERBAD_ACCESS;/*not a super user*/
+ if(GetPartName(partId, pname)) return VOLSERILLEGAL_PARTITION;
+ dirp = opendir(pname);
+ if(dirp == NULL) return VOLSERILLEGAL_PARTITION;
+ strcpy(volname,"");
+
+ while(strcmp(volname,"EOD") && !found) { /*while there are more volumes in the partition */
+ GetNextVol(dirp,volname,&volid);
+ if(strcmp(volname,"")) {/* its a volume */
+ if(volid == volumeId) found = 1;
+ }
+ }
+ closedir(dirp);
+ if (!found) return ENOENT;
+ sprintf(headername, VFORMAT, volumeId);
+ sprintf(opath,"%s/%s", pname, headername);
+ fd = open(opath, O_RDONLY);
+ if (fd < 0) {
+ Log("1 SAFS_VolConvertROtoRWvolume: Couldn't open header for RO-volume %lu.\n", volumeId);
+ return ENOENT;
+ }
+ if (read(fd, &h, sizeof(h)) != sizeof(h)) {
+ Log("1 SAFS_VolConvertROtoRWvolume: Couldn't read header for RO-volume %lu.\n", volumeId);
+ close(fd);
+ return EIO;
+ }
+ close(fd);
+ FSYNC_askfs(volumeId, pname, FSYNC_RESTOREVOLUME, 0);
+
+ for(dp = DiskPartitionList; dp && strcmp(dp->name, pname); dp = dp->next) ;
+ if (!dp) {
+ Log("1 SAFS_VolConvertROtoRWvolume: Couldn't find DiskPartition for %s\n", pname);
+ return EIO;
+ }
+ ino = namei_MakeSpecIno(h.parent, VI_LINKTABLE);
+ IH_INIT(ih, dp->device, h.parent, ino);
+
+ error = namei_ConvertROtoRWvolume(ih, volumeId);
+ if (error)
+ return error;
+ h.id = h.parent;
+ h.volumeInfo_hi = h.id;
+ h.smallVnodeIndex_hi = h.id;
+ h.largeVnodeIndex_hi = h.id;
+ h.linkTable_hi = h.id;
+ sprintf(headername, VFORMAT, h.id);
+ sprintf(npath, "%s/%s", pname, headername);
+ fd = open(npath, O_CREAT | O_EXCL | O_RDWR, 0644);
+ if (fd < 0) {
+ Log("1 SAFS_VolConvertROtoRWvolume: Couldn't create header for RW-volume %lu.\n", h.id);
+ return EIO;
+ }
+ if (write(fd, &h, sizeof(h)) != sizeof(h)) {
+ Log("1 SAFS_VolConvertROtoRWvolume: Couldn't write header for RW-volume %lu.\n", h.id);
+ close(fd);
+ return EIO;
+ }
+ close(fd);
+ if (unlink(opath) < 0) {
+ Log("1 SAFS_VolConvertROtoRWvolume: Couldn't unlink RO header, error = %d\n", error);
+ }
+ FSYNC_askfs(volumeId, pname, FSYNC_DONE, 0);
+ FSYNC_askfs(h.id, pname, FSYNC_ON, 0);
+ return 0;
+#else /* AFS_NAMEI_ENV */
+ return EINVAL;
+#endif /* AFS_NAMEI_ENV */
+}
+
/* GetPartName - map partid (a decimal number) into pname (a string)
* Since for NT we actually want to return the drive name, we map through the
* partition struct.
diff -Naurw openafs-vos-convert.orig/src/volser/vos.c openafs-vos-convert/src/volser/vos.c
--- openafs-vos-convert.orig/src/volser/vos.c Thu Nov 14 22:40:43 2002
+++ openafs-vos-convert/src/volser/vos.c Mon May 12 15:08:10 2003
@@ -10,7 +10,7 @@
#include <afsconfig.h>
#include <afs/param.h>
-RCSID("$Header: /cvs/openafs/src/volser/vos.c,v 1.8.2.6 2002/11/14 21:40:43 zacheiss Exp $");
+RCSID("$Header: /cvs/openafs/src/volser/vos.c,v 1.8.2.5 2002/08/05 22:08:02 shadow Exp $");
#include <sys/types.h>
#ifdef AFS_NT40_ENV
@@ -3988,6 +3988,147 @@
return 0;
}
+static ConvertRO(as)
+register struct cmd_syndesc *as;
+
+{
+ afs_int32 partition = -1;
+ afs_int32 server, volid, code, i, same;
+ struct nvldbentry entry, storeEntry;
+ afs_int32 vcode;
+ afs_int32 rwindex;
+ afs_int32 rwserver = 0;
+ afs_int32 rwpartition;
+ afs_int32 roindex;
+ afs_int32 roserver = 0;
+ afs_int32 ropartition;
+ int force = 0;
+ struct rx_connection *aconn;
+ char c, dc;
+
+ server = GetServer(as->parms[0].items->data);
+ if (!server) {
+ fprintf(STDERR,"vos: host '%s' not found in host table\n",as->parms[0].items->data );
+ return ENOENT;
+ }
+ partition = volutil_GetPartitionID(as->parms[1].items->data);
+ if (partition < 0) {
+ fprintf(STDERR,"vos: could not interpret partition name '%s'\n",as->parms[1].items->data );
+ return ENOENT;
+ }
+ if (!IsPartValid(partition, server, &code)) {
+ if(code) PrintError("",code);
+ else fprintf(STDERR,"vos : partition %s does not exist on the server\n",as->parms[1].items->data);
+ return ENOENT;
+ }
+ volid = vsu_GetVolumeID(as->parms[2].items->data, cstruct, &code);
+ if (volid == 0) {
+ if (code) PrintError("", code);
+ else fprintf(STDERR, "Unknown volume ID or name '%s'\n", as->parms[0].items->data);
+ return -1;
+ }
+ if (as->parms[3].items)
+ force = 1;
+
+ vcode = VLDB_GetEntryByID (volid, -1, &entry);
+ if(vcode) {
+ fprintf(STDERR,"Could not fetch the entry for volume %u from VLDB\n",
+ volid);
+ PrintError("convertROtoRW", code);
+ return vcode;
+ }
+
+ /* use RO volid even if user specified RW or BK volid */
+
+ if (volid != entry.volumeId[ROVOL])
+ volid = entry.volumeId[ROVOL];
+
+ MapHostToNetwork(&entry);
+ for (i=0; i< entry.nServers; i++) {
+ if (entry.serverFlags[i] & ITSRWVOL) {
+ rwindex = i;
+ rwserver = entry.serverNumber[i];
+ rwpartition = entry.serverPartition[i];
+ }
+ if (entry.serverFlags[i] & ITSROVOL) {
+ same = VLDB_IsSameAddrs(server, entry.serverNumber[i], &code);
+ if (code) {
+ fprintf(STDERR, "Failed to get info about server's %d address(es) from vlserver (err=%d); aborting call!\n",
+ server, code);
+ return ENOENT;
+ }
+ if (same) {
+ roindex = i;
+ roserver = entry.serverNumber[i];
+ ropartition = entry.serverPartition[i];
+ break;
+ }
+ }
+ }
+ if (!roserver) {
+ fprintf(STDERR, "Warning: RO volume didn't exist in vldb!\n");
+ }
+ if (ropartition != partition) {
+ fprintf(STDERR, "Warning: RO volume should be in partition %d instead of %d (vldb)\n", ropartition, partition);
+ }
+
+ if (rwserver) {
+ fprintf(STDERR,
+ "VLDB indicates that a RW volume exists already on %s in partition %s.\n", hostutil_GetNameByINet(rwserver), volutil_PartitionName(rwpartition));
+ if (!force) {
+ fprintf(STDERR, "Overwrite this VLDB entry? [y|n] (n)\n");
+ dc = c = getchar();
+ while (!(dc==EOF || dc=='\n')) dc=getchar(); /* goto end of line */
+ if ((c != 'y') && (c != 'Y')) {
+ fprintf(STDERR, "aborted.\n");
+ return -1;
+ }
+ }
+ }
+
+ vcode = ubik_Call(VL_SetLock, cstruct, 0, entry.volumeId[RWVOL], RWVOL,
+ VLOP_MOVE);
+ aconn = UV_Bind(server, AFSCONF_VOLUMEPORT);
+ code = AFSVolConvertROtoRWvolume(aconn, partition, volid);
+ if (code) {
+ fprintf(STDERR,"Converting RO volume %u to RW volume failed with code %d\n", volid, code);
+ PrintError("convertROtoRW ", code);
+ return -1;
+ }
+ entry.serverFlags[roindex] = ITSRWVOL;
+ entry.flags |= RW_EXISTS;
+ entry.flags &= ~BACK_EXISTS;
+ if (rwserver) {
+ (entry.nServers)--;
+ if (rwindex != entry.nServers) {
+ entry.serverNumber[rwindex] = entry.serverNumber[entry.nServers];
+ entry.serverPartition[rwindex] = entry.serverPartition[entry.nServers];
+ entry.serverFlags[rwindex] = entry.serverFlags[entry.nServers];
+ entry.serverNumber[entry.nServers] = 0;
+ entry.serverPartition[entry.nServers] = 0;
+ entry.serverFlags[entry.nServers] = 0;
+ }
+ }
+ entry.flags &= ~RO_EXISTS;
+ for (i=0; i<entry.nServers; i++) {
+ if (entry.serverFlags[i] & ITSROVOL) {
+ if (!(entry.serverFlags[i] & (RO_DONTUSE | NEW_REPSITE)))
+ entry.flags |= RO_EXISTS;
+ }
+ }
+ MapNetworkToHost(&entry, &storeEntry);
+ code = VLDB_ReplaceEntry(entry.volumeId[RWVOL], RWVOL, &storeEntry,
+ (LOCKREL_OPCODE | LOCKREL_AFSID | LOCKREL_TIMESTAMP));
+ if (code) {
+ fprintf(STDERR, "Warning: volume converted, but vldb update failed with code %d!\n", code);
+ }
+ vcode = UV_LockRelease(entry.volumeId[RWVOL]);
+ if (vcode) {
+ PrintDiagnostics("unlock", vcode);
+ }
+ return code;
+}
+
PrintDiagnostics(astring, acode)
char *astring;
afs_int32 acode;
@@ -4279,6 +4420,13 @@
cmd_AddParm(ts, "-printuuid", CMD_FLAG, CMD_OPTIONAL, "print uuid of hosts");
COMMONPARMS;
+ ts = cmd_CreateSyntax("convertROtoRW", ConvertRO, 0, "convert a RO volume into a RW volume (after loss of old RW volume)");
+ cmd_AddParm(ts, "-server", CMD_SINGLE,0, "machine name");
+ cmd_AddParm(ts, "-partition", CMD_SINGLE,0, "partition name");
+ cmd_AddParm(ts, "-id", CMD_SINGLE, 0, "volume name or ID");
+ cmd_AddParm(ts, "-force", CMD_FLAG, CMD_OPTIONAL, "don't ask");
+ COMMONPARMS;
+
code = cmd_Dispatch(argc, argv);
if (rxInitDone) {
/* Shut down the ubik_client and rx connections */
--1yeeQ81UyVL57Vl7--