[OpenAFS-devel] Re: [Patch] Support OpenVZ virtualization scheme for openafs-1.4.8 kernel module

Night Nord NightNord@gmail.com
Wed, 25 Feb 2009 22:40:16 +0300


Sorry, I didn't know. openafs-bugs it bcc (introduction in re-quote).

That's a patch itself:

Sorry, I didn't know. openafs-bugs it bcc (introduction in re-quote).

That's a patch itself:

------------------------------------------ [ cut here ]
-----------------------------------------------

diff --git a/acinclude.m4 b/acinclude.m4
index 4020544..97893a0 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -773,6 +773,7 @@ case $AFS_SYSNAME in *_linux* | *_umlinux*)
                  LINUX_EXPORTS_SYS_OPEN
                  LINUX_EXPORTS_SYS_WAIT4
 		 LINUX_EXPORTS_RCU_READ_LOCK
+		 LINUX_OPENVZ
 		 LINUX_WHICH_MODULES
                  if test "x$ac_cv_linux_config_modversions" =3D "xno"
-o $AFS_SYSKVERS -ge 26; then
                    AC_MSG_WARN([Cannot determine sys_call_table
status. assuming it isn't exported])
diff --git a/src/afs/LINUX/osi_groups.c b/src/afs/LINUX/osi_groups.c
index 612d058..b039c18 100644
--- a/src/afs/LINUX/osi_groups.c
+++ b/src/afs/LINUX/osi_groups.c
@@ -646,7 +646,10 @@ void osi_keyring_init(void)
 #  endif
 	    rcu_read_lock();
 # endif
-#if defined(EXPORTED_FIND_TASK_BY_PID)
+
+#if defined(HAVE_OPENVZ)
+	p =3D find_task_by_pid_all(1);
+#elif defined(EXPORTED_FIND_TASK_BY_PID)
 	p =3D find_task_by_pid(1);
 #else
 	p =3D find_task_by_vpid(1);
diff --git a/src/afs/afs_osi.c b/src/afs/afs_osi.c
index 0ebb913..b9a00ff 100644
--- a/src/afs/afs_osi.c
+++ b/src/afs/afs_osi.c
@@ -853,19 +853,13 @@ afs_osi_TraverseProcTable()
 	rcu_read_lock();
 #endif /* LINUX_VERSION_CODE >=3D KERNEL_VERSION(2,6,16) */

-#ifdef DEFINED_FOR_EACH_PROCESS
+#if defined(HAVE_OPENVZ)
+    for_each_process_all(p) if (p->pid) {
+#elif defined(DEFINED_FOR_EACH_PROCESS)
     for_each_process(p) if (p->pid) {
-#ifdef STRUCT_TASK_STRUCT_HAS_EXIT_STATE
-	if (p->exit_state)
-	    continue;
-#else
-	if (p->state & TASK_ZOMBIE)
-	    continue;
-#endif
-	afs_GCPAGs_perproc_func(p);
-    }
 #else
     for_each_task(p) if (p->pid) {
+#endif
 #ifdef STRUCT_TASK_STRUCT_HAS_EXIT_STATE
 	if (p->exit_state)
 	    continue;
@@ -875,7 +869,6 @@ afs_osi_TraverseProcTable()
 #endif
 	afs_GCPAGs_perproc_func(p);
     }
-#endif
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) &&
defined(EXPORTED_TASKLIST_LOCK)
     if (&tasklist_lock)
 	read_unlock(&tasklist_lock);
diff --git a/src/cf/linux-test4.m4 b/src/cf/linux-test4.m4
index 801f6d5..4fdacad 100644
--- a/src/cf/linux-test4.m4
+++ b/src/cf/linux-test4.m4
@@ -1057,3 +1057,19 @@ AC_DEFUN([LINUX_HAVE_WRITE_BEGIN_AOP], [
   if test "x$ac_cv_linux_write_begin" =3D "xyes"; then
     AC_DEFINE([HAVE_WRITE_BEGIN], 1, [define if your kernel has a
write_begin() address space op])
   fi])
+
+AC_DEFUN([LINUX_OPENVZ], [
+  AC_MSG_CHECKING([for linux OpenVZ project extensions])
+  AC_CACHE_VAL([ac_cv_linux_openvz], [
+    AC_TRY_KBUILD(
+[#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>],
+[get_ve(NULL);],
+      ac_cv_linux_openvz=3Dyes,
+      ac_cv_linux_openvz=3Dno)])
+  AC_MSG_RESULT($ac_cv_linux_openvz)
+  if test "x$ac_cv_linux_openvz" =3D "xyes"; then
+    AC_DEFINE([HAVE_OPENVZ], 1, [define if your kernel have
configured OpenVZ extensions])
+  fi])
diff --git a/src/rx/LINUX/rx_knet.c b/src/rx/LINUX/rx_knet.c
index 59f982e..05c8533 100644
--- a/src/rx/LINUX/rx_knet.c
+++ b/src/rx/LINUX/rx_knet.c
@@ -102,6 +102,10 @@ osi_NetSend(osi_socket sop, struct sockaddr_in
*to, struct iovec *iovec,
     struct msghdr msg;
     int code;

+#ifdef HAVE_OPENVZ
+    struct ve_struct *ve;
+#endif
+
     msg.msg_iovlen =3D iovcnt;
     msg.msg_iov =3D iovec;
     msg.msg_name =3D to;
@@ -110,9 +114,15 @@ osi_NetSend(osi_socket sop, struct sockaddr_in
*to, struct iovec *iovec,
     msg.msg_controllen =3D 0;
     msg.msg_flags =3D 0;

+#ifdef HAVE_OPENVZ
+    ve =3D set_exec_env(get_ve0());
+#endif
     TO_USER_SPACE();
     code =3D sock_sendmsg(sop, &msg, size);
     TO_KERNEL_SPACE();
+#ifdef HAVE_OPENVZ
+    (void)set_exec_env(ve);
+#endif
     return (code < 0) ? code : 0;
 }

@@ -147,6 +157,9 @@ osi_NetReceive(osi_socket so, struct sockaddr_in
*from, struct iovec *iov,
     int code;
     struct iovec tmpvec[RX_MAXWVECS + 2];
     struct socket *sop =3D (struct socket *)so;
+#ifdef HAVE_OPENVZ
+    struct ve_struct *ve;
+#endif

     if (iovcnt > RX_MAXWVECS + 2) {
 	osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
@@ -159,9 +172,15 @@ osi_NetReceive(osi_socket so, struct sockaddr_in
*from, struct iovec *iov,
     msg.msg_controllen =3D 0;
     msg.msg_flags =3D 0;

+#ifdef HAVE_OPENVZ
+    ve =3D set_exec_env(get_ve0());
+#endif
     TO_USER_SPACE();
     code =3D sock_recvmsg(sop, &msg, *lengthp, 0);
     TO_KERNEL_SPACE();
+#ifdef HAVE_OPENVZ
+    (void)set_exec_env(ve);
+#endif

     if (code < 0) {
 #ifdef AFS_LINUX26_ENV
diff --git a/src/rx/rx_lwp.c b/src/rx/rx_lwp.c
index 8c76ab9..1a5e7ee 100644
--- a/src/rx/rx_lwp.c
+++ b/src/rx/rx_lwp.c
@@ -428,7 +428,18 @@ rxi_Listen(osi_socket sock)
 int
 rxi_Recvmsg(int socket, struct msghdr *msg_p, int flags)
 {
-    return recvmsg((int)socket, msg_p, flags);
+#ifdef HAVE_OPENVZ
+	struct ve_struct *ve;
+	int ret;
+
+	ve =3D set_exec_env(get_ve0());
+	ret =3D recvmsg((int)socket, msg_p, flags);
+	(void)set_exec_env(ve);
+
+	return ret;
+#else
+	return recvmsg((int)socket, msg_p, flags);
+#endif
 }

 /*
@@ -438,6 +449,10 @@ rxi_Recvmsg(int socket, struct msghdr *msg_p, int flag=
s)
 int
 rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags)
 {
+#ifdef HAVE_OPENVZ
+	struct ve_struct *ve;
+	ve =3D set_exec_env(get_ve0());
+#endif
     fd_set *sfds =3D (fd_set *) 0;
     while (sendmsg(socket, msg_p, flags) =3D=3D -1) {
 	int err;
@@ -446,6 +461,9 @@ rxi_Sendmsg(osi_socket socket, struct msghdr
*msg_p, int flags)
 	if (!sfds) {
 	    if (!(sfds =3D IOMGR_AllocFDSet())) {
 		(osi_Msg "rx failed to alloc fd_set: ");
+#ifdef HAVE_OPENVZ
+		(void)set_exec_env(ve);
+#endif
 		perror("rx_sendmsg");
 		return -1;
 	    }
@@ -465,14 +483,24 @@ rxi_Sendmsg(osi_socket socket, struct msghdr
*msg_p, int flags)
 	{
 	    (osi_Msg "rx failed to send packet: ");
 	    perror("rx_sendmsg");
+#ifdef HAVE_OPENVZ
+		(void)set_exec_env(ve);
+#endif
 	    return -1;
 	}
 	while ((err =3D select(socket + 1, 0, sfds, 0, 0)) !=3D 1) {
-	    if (err >=3D 0 || errno !=3D EINTR)
+	    if (err >=3D 0 || errno !=3D EINTR) {
+#ifdef HAVE_OPENVZ
+		(void)set_exec_env(ve);
+#endif
 		osi_Panic("rxi_sendmsg: select error %d.%d", err, errno);
+	    }
 	}
     }
     if (sfds)
 	IOMGR_FreeFDSet(sfds);
+#ifdef HAVE_OPENVZ
+	(void)set_exec_env(ve);
+#endif
     return 0;
 }
diff --git a/src/rx/rx_pthread.c b/src/rx/rx_pthread.c
index f69bdb6..64b68c2 100644
--- a/src/rx/rx_pthread.c
+++ b/src/rx/rx_pthread.c
@@ -401,7 +401,14 @@ int
 rxi_Recvmsg(int socket, struct msghdr *msg_p, int flags)
 {
     int ret;
+#ifdef HAVE_OPENVZ
+	struct ve_struct *ve;
+	(void)set_exec_env(ve);
+#endif
     ret =3D recvmsg(socket, msg_p, flags);
+#ifdef HAVE_OPENVZ
+	(void)set_exec_env(ve);
+#endif
     return ret;
 }

@@ -411,8 +418,15 @@ rxi_Recvmsg(int socket, struct msghdr *msg_p, int flag=
s)
 int
 rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags)
 {
+#ifdef HAVE_OPENVZ
+	struct ve_struct *ve
+	ve =3D set_exec_env(get_ve0());
+#endif
     int ret;
     ret =3D sendmsg(socket, msg_p, flags);
+#ifdef HAVE_OPENVZ
+    (void)set_exec_env(ve);
+#endif
 #ifdef AFS_LINUX22_ENV
     /* linux unfortunately returns ECONNREFUSED if the target port
      * is no longer in use */

----------------------------------- [ patch ends here ]
----------------------------------------

2009/2/25 Night Nord <nightnord@gmail.com>:
> OpenVZ (www.openvz.org) is a virtualization solution - looks like very
> advanced chroot jail - when multiply virtual servers (CT - containers) sh=
ares
> same kernels, but all operations executed in different namespaces. This
> includes net namespace. Such scheme allows very cheap and secure virtuali=
zation,
> but requires special support for any kernel subsystem/module. That patch =
allows
> openafs-1.4.8 AFS client (libafs kernel module) to be used into such
> environment. That patch not actually virtualizes module, but allows it to=
 be
> built correctly and allows to `mount --bind` /afs from 'real system'
> (Hardware Node - HN - in OpenVZ terminology) into containers (which are l=
ike
> chroot's, but much more complex).
>
> Detailed:
>
> 1) Build-system:
> =A0 =A0 =A0 =A0Check into acinclude.m4 and src/cf/linux-test.m4 for enabl=
ed OpenVZ
> =A0 =A0 =A0 =A0containers scheme (HAVE_OPENVZ).
>
> 2) src/afs/LINUX/osi_groups.c:
> =A0 =A0 =A0 =A0Use find_task_by_pid_all(1) to get HN's 'init', not one fr=
om container.
>
> 3) src/afs/afs_osi.c:
> =A0 =A0 =A0 =A0Use for_each_process_all to support storing authentication=
 tokens for
> =A0 =A0 =A0 =A0all processes, not only HN's. (Necessary to build)
> 4) src/afs/rx_knet.c, src/afs/rx_lwp.c, src/afs/rx_pthread.c
> =A0 =A0 =A0 =A0Make it bindable into containers - idea is simple, sending=
 requests
> =A0 =A0 =A0 =A0and receiving them must be done from HN's namespace - wher=
e actually
> =A0 =A0 =A0 =A0all other work is simple, otherwise you will see files fro=
m container,
> =A0 =A0 =A0 =A0but any operation will be timed out. So we must just switc=
h into HN's
> =A0 =A0 =A0 =A0(aka VE0 - virtual environment) namespace before sent, and=
 then switch
> =A0 =A0 =A0 =A0back before any return.
>

--=20
Night Nord