[OpenAFS] openafs high availability problem

sophana sophana@zizi.ath.cx
Wed, 29 Aug 2007 23:11:07 +0200


Derrick Brashear a =E9crit :
> This provides an interface to tweak the bit shift used to increment=20
> the timeouts (currently it shifts by
> 8, e.g. an increment of 2^8 or 256 milliseconds, I suggest setting to=20
> 7 to reduce it)
>
> Probably needs minor hand tweaking to apply, notably since we are=20
> already using sysctl 5 on Linux.
>
> Index: src/afs/LINUX/osi_sysctl.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> RCS file: /cvs/openafs/src/afs/LINUX/osi_sysctl.c,v
> retrieving revision 1.2.4.3 <http://1.2.4.3>
> diff -u -r1.2.4.3 osi_sysctl.c
> --- src/afs/LINUX/osi_sysctl.c 10 Jul 2002 20:42:33 -0000 1.2.4.3=20
> <http://1.2.4.3>
> +++ src/afs/LINUX/osi_sysctl.c 5 Jan 2004 06:27:57 -0000
> @@ -20,6 +20,7 @@
> extern afs_int32 hm_retry_RO;
> extern afs_int32 hm_retry_RW;
> extern afs_int32 hm_retry_int;
> +extern afs_int32 retrans_shift;
>
> #ifdef CONFIG_SYSCTL
> static struct ctl_table_header *afs_sysctl =3D NULL;
> @@ -36,6 +37,9 @@
> &proc_dointvec},
> {4, "GCPAGs",
> &afs_gcpags, sizeof(afs_int32), 0644, NULL,
> + &proc_dointvec},
> + {4, "retrans_shift",
> + &retrans_shift, sizeof(afs_int32), 0644, NULL,
> &proc_dointvec},
> {0}
> };
> Index: src/rx/rx.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> RCS file: /cvs/openafs/src/rx/rx.c,v
> retrieving revision 1.22.2.23 <http://1.22.2.23>
> diff -u -r1.22.2.23 rx.c
> --- src/rx/rx.c 5 Jun 2003 21:42:41 -0000 1.22.2.23 <http://1.22.2.23>
> +++ src/rx/rx.c 5 Jan 2004 06:28:15 -0000
> @@ -115,6 +115,8 @@
> } rx_tq_debug;
> #endif /* AFS_GLOBAL_RXLOCK_KERNEL */
>
> +afs_int32 retrans_shift =3D 8;
> +
> /*
> * rxi_rpc_peer_stat_cnt counts the total number of peer stat structures
> * currently allocated within rx. This number is used to allocate the
> @@ -3572,7 +3574,7 @@
> tp->retryTime =3D tp->timeSent;
> clock_Add(&tp->retryTime, &peer->timeout);
> /* shift by eight because one quarter-sec ~ 256 milliseconds */
> - clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) << 8);
> + clock_Addmsec(&(tp->retryTime), ((afs_uint32) tp->backoff) <<=20
> retrans_shift);
> }
> }
>
> @@ -4658,7 +4660,7 @@
> }
> else list[i]->backoff++;
> clock_Addmsec(&(list[i]->retryTime),
> - ((afs_uint32) list[i]->backoff) << 8);
> + ((afs_uint32) list[i]->backoff) << retrans_shift);
> }
>
> /* Wait a little extra for the ack on the last packet */
> Index: src/rx/rx_packet.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> RCS file: /cvs/openafs/src/rx/rx_packet.c,v
> retrieving revision 1.14.2.6 <http://1.14.2.6>
> diff -u -r1.14.2.6 rx_packet.c
> --- src/rx/rx_packet.c 23 May 2003 06:52:31 -0000 1.14.2.6=20
> <http://1.14.2.6>
> +++ src/rx/rx_packet.c 5 Jan 2004 06:28:17 -0000
> @@ -100,6 +100,7 @@
>
> static void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket=20
> asocket,
> afs_int32 ahost, short aport, afs_int32 istack);
> +extern afs_int32 retrans_shift;
>
> /* some rules about packets:
> * 1. When a packet is allocated, the final iov_buf contains room for
> @@ -1558,7 +1559,7 @@
> rx_stats.netSendFailures++;
> MUTEX_EXIT(&rx_stats_mutex);
> p->retryTime =3D p->timeSent; /* resend it very soon */
> - clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8))=
;
> + clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) <<=20
> retrans_shift));
>
> #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
> /* Linux is nice -- it can tell us right away that we cannot
> @@ -1732,7 +1733,7 @@
> for (i =3D 0 ; i < len ; i++) {
> p =3D list[i];
> p->retryTime =3D p->timeSent; /* resend it very soon */
> - clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) << 8))=
;
> + clock_Addmsec(&(p->retryTime), 10 + (((afs_uint32) p->backoff) <<=20
> retrans_shift));
> }
> #if defined(KERNEL) && defined(AFS_LINUX20_ENV)
> /* Linux is nice -- it can tell us right away that we cannot
> Index: src/viced/viced.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> RCS file: /cvs/openafs/src/viced/viced.c,v
> retrieving revision 1.11.2.11 <http://1.11.2.11>
> diff -u -r1.11.2.11 viced.c
> --- src/viced/viced.c 6 Aug 2003 02:58:43 -0000 1.11.2.11=20
> <http://1.11.2.11>
> +++ src/viced/viced.c 5 Jan 2004 06:28:21 -0000
> @@ -102,6 +102,8 @@
> extern int RXAFS_ExecuteRequest();
> extern int RXSTATS_ExecuteRequest();
>
> +extern afs_int32 retrans_shift;
> +
> int eventlog =3D 0, rxlog =3D 0;
> FILE *debugFile;
> FILE * console =3D NULL;
> @@ -1079,6 +1081,12 @@
> int bufSize =3D 0; /* temp variable to read in udp socket buf size*/
>
> for (i =3D 1; i < argc; i++) {
> + if (!strcmp(argv[i], "-rexmit")) {
> + int rxshift =3D atoi(argv[++i]);
> + if ((rxshift > 0) && (rxshift < 9))
> + retrans_shift =3D rxshift;
> + }
> + else
> if (!strcmp(argv[i], "-d")) {
> debuglevel =3D atoi(argv[++i]);
> LogLevel =3D debuglevel;
>
Will that patch apply easily on a 1.4.4 source?
It seems that there is a new parameter "retrans_shift" to osi_sysctl.c,=20
how do you set this parameter?
Setting 7 to it will bring the timeout switching to what value?

Thanks