[OpenAFS] volume_top.d
Robert Milkowski
rmilkowski@task.gda.pl
Thu, 25 Oct 2012 23:48:27 +0100
Hi,
Couple of people asked me about the dtrace scripts I referred to =
during my
presentation.
Below is one of them - it is a very quickly written script, only tested =
on
our 1.4.11 based tree (with lots of patches, specifically DAFS), but =
there
is a good chance it will just work on 1.4.11+ and maybe even on 1.6 (I
haven't tried it though). It makes some very specific assumptions about =
how
a given release of OpenAFS works, and it might not work correctly or at =
all
for you - although it should be trivial to fix it then.
Please use it at your own risk.
The script requires two arguments - pid of fileserver, and how often it
should print its output (for example 5s).
It then clears terminal at a specified interval and prints all the =
volumes
which were read or written to since the script was started, sorted by =
the
number of read MBs.
Example output:
# ./volume_top.d `pgrep fileserver` 5s
[...]
Mountpoint=A0=A0=A0=A0=A0=A0=A0=A0=A0 VolID=A0=A0=A0=A0 Read[MB] =
Wrote[MB]
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=20
/vicepa=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 542579958=A0=A0=A0=A0=A0=A0 =
100=A0=A0=A0=A0=A0=A0=A0 10=20
/vicepa=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
536904476=A0=A0=A0=A0=A0=A0=A0=A0 0=A0=A0=A0=A0=A0=A0=A0 24=20
/vicepb=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
536874428=A0=A0=A0=A0=A0=A0=A0=A0 0=A0=A0=A0=A0=A0=A0=A0=A0 0
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0 100=A0=A0=A0=A0=A0=A0=A0 34
=A0
=A0=A0 started: 2010 Nov=A0 8 16:16:01
=A0=A0 current: 2010 Nov=A0 8 16:25:46
$ cat volume_top.d
#!/usr/sbin/dtrace -qCs
typedef unsigned int afs_uint32;
typedef long long afs_int64;
typedef int afs_int32;
typedef afs_uint32 VolumeId;
typedef afs_uint32 bit32; /* Unsigned, 32 bits */
typedef bit32 Device; /* Unix device number */
struct rx_queue {
struct rx_queue *prev;
struct rx_queue *next;
};
struct DiskPartition64 {
struct DiskPartition64 *next;
char *name; /* Mounted partition name */
char *devName; /* Device mounted on */
Device device; /* device number */
afs_int32 index; /* partition index =
(0<=3Dx<=3DVOLMAXPARTS) */
int lock_fd; /* File descriptor of this partition if
locked; otherwise -1;
* Not used by the file server */
afs_int64 free; /* Total number of blocks (1K)
presumed
* available on this partition =
(accounting
* for the minfree parameter for the
* partition). This is adjusted
* approximately by the sizes of files
* and directories read/written, and
* periodically the superblock is read =
and
* this is recomputed. This number can
* be negative, if the partition starts
* out too full */
afs_int64 totalUsable; /* Total number of blocks =
available
on this
* partition, taking into account the
minfree
* parameter for the partition (see the
* 4.2bsd command tunefs, but note that =
the
* bug mentioned there--that the =
superblock
* is not reread--does not apply here. =
The
* superblock is re-read periodically by
* VSetPartitionDiskUsage().) */
afs_int64 minFree; /* Number blocks to be kept free, as =
last
read
* from the superblock */
int flags;
afs_int64 f_files; /* total number of files in this =
partition
*/
}; /* IT IS NOT a complete structure definition */
typedef struct Volume {
struct rx_queue q; /* Volume hash chain pointers */
VolumeId hashid; /* Volume number -- for hash table =
lookup */
void *header; /* Cached disk data - FAKED TYPE */
Device device; /* Unix device for the volume */
struct DiskPartition64
*partition; /* Information about the Unix partition =
*/
}; /* it is not the entire structure! */
BEGIN
{
start_timestamp =3D timestamp;
start_walltimestamp =3D walltimestamp;
}
pid$1:a.out:FetchData_RXStyle:entry
{
self->fetchdata =3D 1;
this->volume =3D (struct Volume *)copyin(arg0, sizeof(struct Volume));
this->partition =3D (struct DiskPartition64
*)copyin((uintptr_t)this->volume->partition, sizeof(struct
DiskPartition64));
self->volumeid =3D this->volume->hashid;
self->partition_name =3D copyinstr((uintptr_t)this->partition->name);
}
pid$1:a.out:FetchData_RXStyle:return
/ self->fetchdata /
{
self->fetchdata =3D 0;
self->volumeid =3D 0;
self->partition_name =3D 0;
}
syscall::readv:return
/ self->fetchdata /
{
@rx_fetchdata[self->partition_name, self->volumeid] =3D sum(arg0);
@rx_fetchdata_total =3D sum(arg0);
}
pid$1:a.out:StoreData_RXStyle:entry
{
self->storedata =3D 1;
this->volume =3D (struct Volume *)copyin(arg0, sizeof(struct Volume));
this->partition =3D (struct DiskPartition64
*)copyin((uintptr_t)this->volume->partition, sizeof(struct
DiskPartition64));
self->volumeid =3D this->volume->hashid;
self->partition_name =3D copyinstr((uintptr_t)this->partition->name);
}
pid$1:a.out:StoreData_RXStyle:return
/ self->storedata /
{
self->storedata =3D 0;
self->volumeid =3D 0;
self->partition_name =3D 0;
}
syscall::writev:return
/ self->storedata /
{
@rx_storedata[self->partition_name, self->volumeid] =3D sum(arg0);
@rx_storedata_total =3D sum(arg0);
}
tick-$2
{
setopt("aggsortrev", "true");
printf("\033[H\033[J");
normalize(@rx_fetchdata, 1024*1024);
normalize(@rx_storedata, 1024*1024);
normalize(@rx_fetchdata_total, 1024*1024);
normalize(@rx_storedata_total, 1024*1024);
printf("Mountpoint Volume ID Read[MB] Wrote[MB]\n");
printf("=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D\n");
printa("%-15s %12d %@9d %@9d\n", \
@rx_fetchdata, @rx_storedata);
printf(" =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D\n");
printf("%-15s %12s ", " ", " ");
printa("%@9d %@9d\n", @rx_fetchdata_total, @rx_storedata_total);
printf("\n\n");
printf(" started: %Y\n", start_walltimestamp);
printf(" current: %Y\n", walltimestamp);
}
--=20
Robert Milkowski
http://milek.blogspot.com