[OpenAFS-devel] Re: [Patch] Support OpenVZ virtualization scheme for openafs-1.4.8
kernel module
Night Nord
NightNord@gmail.com
Wed, 25 Feb 2009 22:40:16 +0300
Sorry, I didn't know. openafs-bugs it bcc (introduction in re-quote).
That's a patch itself:
Sorry, I didn't know. openafs-bugs it bcc (introduction in re-quote).
That's a patch itself:
------------------------------------------ [ cut here ]
-----------------------------------------------
diff --git a/acinclude.m4 b/acinclude.m4
index 4020544..97893a0 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -773,6 +773,7 @@ case $AFS_SYSNAME in *_linux* | *_umlinux*)
LINUX_EXPORTS_SYS_OPEN
LINUX_EXPORTS_SYS_WAIT4
LINUX_EXPORTS_RCU_READ_LOCK
+ LINUX_OPENVZ
LINUX_WHICH_MODULES
if test "x$ac_cv_linux_config_modversions" =3D "xno"
-o $AFS_SYSKVERS -ge 26; then
AC_MSG_WARN([Cannot determine sys_call_table
status. assuming it isn't exported])
diff --git a/src/afs/LINUX/osi_groups.c b/src/afs/LINUX/osi_groups.c
index 612d058..b039c18 100644
--- a/src/afs/LINUX/osi_groups.c
+++ b/src/afs/LINUX/osi_groups.c
@@ -646,7 +646,10 @@ void osi_keyring_init(void)
# endif
rcu_read_lock();
# endif
-#if defined(EXPORTED_FIND_TASK_BY_PID)
+
+#if defined(HAVE_OPENVZ)
+ p =3D find_task_by_pid_all(1);
+#elif defined(EXPORTED_FIND_TASK_BY_PID)
p =3D find_task_by_pid(1);
#else
p =3D find_task_by_vpid(1);
diff --git a/src/afs/afs_osi.c b/src/afs/afs_osi.c
index 0ebb913..b9a00ff 100644
--- a/src/afs/afs_osi.c
+++ b/src/afs/afs_osi.c
@@ -853,19 +853,13 @@ afs_osi_TraverseProcTable()
rcu_read_lock();
#endif /* LINUX_VERSION_CODE >=3D KERNEL_VERSION(2,6,16) */
-#ifdef DEFINED_FOR_EACH_PROCESS
+#if defined(HAVE_OPENVZ)
+ for_each_process_all(p) if (p->pid) {
+#elif defined(DEFINED_FOR_EACH_PROCESS)
for_each_process(p) if (p->pid) {
-#ifdef STRUCT_TASK_STRUCT_HAS_EXIT_STATE
- if (p->exit_state)
- continue;
-#else
- if (p->state & TASK_ZOMBIE)
- continue;
-#endif
- afs_GCPAGs_perproc_func(p);
- }
#else
for_each_task(p) if (p->pid) {
+#endif
#ifdef STRUCT_TASK_STRUCT_HAS_EXIT_STATE
if (p->exit_state)
continue;
@@ -875,7 +869,6 @@ afs_osi_TraverseProcTable()
#endif
afs_GCPAGs_perproc_func(p);
}
-#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) &&
defined(EXPORTED_TASKLIST_LOCK)
if (&tasklist_lock)
read_unlock(&tasklist_lock);
diff --git a/src/cf/linux-test4.m4 b/src/cf/linux-test4.m4
index 801f6d5..4fdacad 100644
--- a/src/cf/linux-test4.m4
+++ b/src/cf/linux-test4.m4
@@ -1057,3 +1057,19 @@ AC_DEFUN([LINUX_HAVE_WRITE_BEGIN_AOP], [
if test "x$ac_cv_linux_write_begin" =3D "xyes"; then
AC_DEFINE([HAVE_WRITE_BEGIN], 1, [define if your kernel has a
write_begin() address space op])
fi])
+
+AC_DEFUN([LINUX_OPENVZ], [
+ AC_MSG_CHECKING([for linux OpenVZ project extensions])
+ AC_CACHE_VAL([ac_cv_linux_openvz], [
+ AC_TRY_KBUILD(
+[#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/capability.h>
+#include <linux/ve.h>],
+[get_ve(NULL);],
+ ac_cv_linux_openvz=3Dyes,
+ ac_cv_linux_openvz=3Dno)])
+ AC_MSG_RESULT($ac_cv_linux_openvz)
+ if test "x$ac_cv_linux_openvz" =3D "xyes"; then
+ AC_DEFINE([HAVE_OPENVZ], 1, [define if your kernel have
configured OpenVZ extensions])
+ fi])
diff --git a/src/rx/LINUX/rx_knet.c b/src/rx/LINUX/rx_knet.c
index 59f982e..05c8533 100644
--- a/src/rx/LINUX/rx_knet.c
+++ b/src/rx/LINUX/rx_knet.c
@@ -102,6 +102,10 @@ osi_NetSend(osi_socket sop, struct sockaddr_in
*to, struct iovec *iovec,
struct msghdr msg;
int code;
+#ifdef HAVE_OPENVZ
+ struct ve_struct *ve;
+#endif
+
msg.msg_iovlen =3D iovcnt;
msg.msg_iov =3D iovec;
msg.msg_name =3D to;
@@ -110,9 +114,15 @@ osi_NetSend(osi_socket sop, struct sockaddr_in
*to, struct iovec *iovec,
msg.msg_controllen =3D 0;
msg.msg_flags =3D 0;
+#ifdef HAVE_OPENVZ
+ ve =3D set_exec_env(get_ve0());
+#endif
TO_USER_SPACE();
code =3D sock_sendmsg(sop, &msg, size);
TO_KERNEL_SPACE();
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
return (code < 0) ? code : 0;
}
@@ -147,6 +157,9 @@ osi_NetReceive(osi_socket so, struct sockaddr_in
*from, struct iovec *iov,
int code;
struct iovec tmpvec[RX_MAXWVECS + 2];
struct socket *sop =3D (struct socket *)so;
+#ifdef HAVE_OPENVZ
+ struct ve_struct *ve;
+#endif
if (iovcnt > RX_MAXWVECS + 2) {
osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
@@ -159,9 +172,15 @@ osi_NetReceive(osi_socket so, struct sockaddr_in
*from, struct iovec *iov,
msg.msg_controllen =3D 0;
msg.msg_flags =3D 0;
+#ifdef HAVE_OPENVZ
+ ve =3D set_exec_env(get_ve0());
+#endif
TO_USER_SPACE();
code =3D sock_recvmsg(sop, &msg, *lengthp, 0);
TO_KERNEL_SPACE();
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
if (code < 0) {
#ifdef AFS_LINUX26_ENV
diff --git a/src/rx/rx_lwp.c b/src/rx/rx_lwp.c
index 8c76ab9..1a5e7ee 100644
--- a/src/rx/rx_lwp.c
+++ b/src/rx/rx_lwp.c
@@ -428,7 +428,18 @@ rxi_Listen(osi_socket sock)
int
rxi_Recvmsg(int socket, struct msghdr *msg_p, int flags)
{
- return recvmsg((int)socket, msg_p, flags);
+#ifdef HAVE_OPENVZ
+ struct ve_struct *ve;
+ int ret;
+
+ ve =3D set_exec_env(get_ve0());
+ ret =3D recvmsg((int)socket, msg_p, flags);
+ (void)set_exec_env(ve);
+
+ return ret;
+#else
+ return recvmsg((int)socket, msg_p, flags);
+#endif
}
/*
@@ -438,6 +449,10 @@ rxi_Recvmsg(int socket, struct msghdr *msg_p, int flag=
s)
int
rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags)
{
+#ifdef HAVE_OPENVZ
+ struct ve_struct *ve;
+ ve =3D set_exec_env(get_ve0());
+#endif
fd_set *sfds =3D (fd_set *) 0;
while (sendmsg(socket, msg_p, flags) =3D=3D -1) {
int err;
@@ -446,6 +461,9 @@ rxi_Sendmsg(osi_socket socket, struct msghdr
*msg_p, int flags)
if (!sfds) {
if (!(sfds =3D IOMGR_AllocFDSet())) {
(osi_Msg "rx failed to alloc fd_set: ");
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
perror("rx_sendmsg");
return -1;
}
@@ -465,14 +483,24 @@ rxi_Sendmsg(osi_socket socket, struct msghdr
*msg_p, int flags)
{
(osi_Msg "rx failed to send packet: ");
perror("rx_sendmsg");
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
return -1;
}
while ((err =3D select(socket + 1, 0, sfds, 0, 0)) !=3D 1) {
- if (err >=3D 0 || errno !=3D EINTR)
+ if (err >=3D 0 || errno !=3D EINTR) {
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
osi_Panic("rxi_sendmsg: select error %d.%d", err, errno);
+ }
}
}
if (sfds)
IOMGR_FreeFDSet(sfds);
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
return 0;
}
diff --git a/src/rx/rx_pthread.c b/src/rx/rx_pthread.c
index f69bdb6..64b68c2 100644
--- a/src/rx/rx_pthread.c
+++ b/src/rx/rx_pthread.c
@@ -401,7 +401,14 @@ int
rxi_Recvmsg(int socket, struct msghdr *msg_p, int flags)
{
int ret;
+#ifdef HAVE_OPENVZ
+ struct ve_struct *ve;
+ (void)set_exec_env(ve);
+#endif
ret =3D recvmsg(socket, msg_p, flags);
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
return ret;
}
@@ -411,8 +418,15 @@ rxi_Recvmsg(int socket, struct msghdr *msg_p, int flag=
s)
int
rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags)
{
+#ifdef HAVE_OPENVZ
+ struct ve_struct *ve
+ ve =3D set_exec_env(get_ve0());
+#endif
int ret;
ret =3D sendmsg(socket, msg_p, flags);
+#ifdef HAVE_OPENVZ
+ (void)set_exec_env(ve);
+#endif
#ifdef AFS_LINUX22_ENV
/* linux unfortunately returns ECONNREFUSED if the target port
* is no longer in use */
----------------------------------- [ patch ends here ]
----------------------------------------
2009/2/25 Night Nord <nightnord@gmail.com>:
> OpenVZ (www.openvz.org) is a virtualization solution - looks like very
> advanced chroot jail - when multiply virtual servers (CT - containers) sh=
ares
> same kernels, but all operations executed in different namespaces. This
> includes net namespace. Such scheme allows very cheap and secure virtuali=
zation,
> but requires special support for any kernel subsystem/module. That patch =
allows
> openafs-1.4.8 AFS client (libafs kernel module) to be used into such
> environment. That patch not actually virtualizes module, but allows it to=
be
> built correctly and allows to `mount --bind` /afs from 'real system'
> (Hardware Node - HN - in OpenVZ terminology) into containers (which are l=
ike
> chroot's, but much more complex).
>
> Detailed:
>
> 1) Build-system:
> =A0 =A0 =A0 =A0Check into acinclude.m4 and src/cf/linux-test.m4 for enabl=
ed OpenVZ
> =A0 =A0 =A0 =A0containers scheme (HAVE_OPENVZ).
>
> 2) src/afs/LINUX/osi_groups.c:
> =A0 =A0 =A0 =A0Use find_task_by_pid_all(1) to get HN's 'init', not one fr=
om container.
>
> 3) src/afs/afs_osi.c:
> =A0 =A0 =A0 =A0Use for_each_process_all to support storing authentication=
tokens for
> =A0 =A0 =A0 =A0all processes, not only HN's. (Necessary to build)
> 4) src/afs/rx_knet.c, src/afs/rx_lwp.c, src/afs/rx_pthread.c
> =A0 =A0 =A0 =A0Make it bindable into containers - idea is simple, sending=
requests
> =A0 =A0 =A0 =A0and receiving them must be done from HN's namespace - wher=
e actually
> =A0 =A0 =A0 =A0all other work is simple, otherwise you will see files fro=
m container,
> =A0 =A0 =A0 =A0but any operation will be timed out. So we must just switc=
h into HN's
> =A0 =A0 =A0 =A0(aka VE0 - virtual environment) namespace before sent, and=
then switch
> =A0 =A0 =A0 =A0back before any return.
>
--=20
Night Nord