[OpenAFS] volume_top.d

Robert Milkowski rmilkowski@task.gda.pl
Thu, 25 Oct 2012 23:48:27 +0100


Hi,

  Couple of people asked me about the dtrace scripts I referred to =
during my
presentation.
Below is one of them - it is a very quickly written script, only tested =
on
our 1.4.11 based tree (with lots of patches, specifically DAFS), but =
there
is a good chance it will just work on 1.4.11+ and maybe even on 1.6 (I
haven't tried it though). It makes some very specific assumptions about =
how
a given release of OpenAFS works, and it might not work correctly or at =
all
for you - although it should be trivial to fix it then.

Please use it at your own risk.

The script requires two arguments - pid of fileserver, and how often it
should print its output (for example 5s).
It then clears terminal at a specified interval and prints all the =
volumes
which were read or written to since the script was started, sorted by =
the
number of read MBs.

Example output:


# ./volume_top.d `pgrep fileserver` 5s
[...]

Mountpoint=A0=A0=A0=A0=A0=A0=A0=A0=A0 VolID=A0=A0=A0=A0 Read[MB] =
Wrote[MB]

 =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=20
/vicepa=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 542579958=A0=A0=A0=A0=A0=A0 =
100=A0=A0=A0=A0=A0=A0=A0 10=20
/vicepa=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
536904476=A0=A0=A0=A0=A0=A0=A0=A0 0=A0=A0=A0=A0=A0=A0=A0 24=20
/vicepb=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
536874428=A0=A0=A0=A0=A0=A0=A0=A0 0=A0=A0=A0=A0=A0=A0=A0=A0 0
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0 100=A0=A0=A0=A0=A0=A0=A0 34
=A0

=A0=A0 started: 2010 Nov=A0 8 16:16:01
=A0=A0 current: 2010 Nov=A0 8 16:25:46




$ cat volume_top.d
#!/usr/sbin/dtrace -qCs

typedef unsigned int afs_uint32;
typedef long long afs_int64;
typedef int afs_int32;

typedef afs_uint32 VolumeId;
typedef afs_uint32 bit32;       /* Unsigned, 32 bits */
typedef bit32 Device;           /* Unix device number */

struct rx_queue {
    struct rx_queue *prev;
    struct rx_queue *next;
};

struct DiskPartition64 {
    struct DiskPartition64 *next;
    char *name;                 /* Mounted partition name */
    char *devName;              /* Device mounted on */
    Device device;              /* device number */
    afs_int32 index;            /* partition index =
(0<=3Dx<=3DVOLMAXPARTS) */
    int lock_fd;                /* File descriptor of this partition if
locked; otherwise -1;
                                 * Not used by the file server */
    afs_int64 free;                     /* Total number of blocks (1K)
presumed
                                 * available on this partition =
(accounting
                                 * for the minfree parameter for the
                                 * partition).  This is adjusted
                                 * approximately by the sizes of files
                                 * and directories read/written, and
                                 * periodically the superblock is read =
and
                                 * this is recomputed.  This number can
                                 * be negative, if the partition starts
                                 * out too full */
    afs_int64 totalUsable;              /* Total number of blocks =
available
on this
                                 * partition, taking into account the
minfree
                                 * parameter for the partition (see the
                                 * 4.2bsd command tunefs, but note that =
the
                                 * bug mentioned there--that the =
superblock
                                 * is not reread--does not apply here.  =
The
                                 * superblock is re-read periodically by
                                 * VSetPartitionDiskUsage().) */
    afs_int64 minFree;          /* Number blocks to be kept free, as =
last
read
                                 * from the superblock */
    int flags;
    afs_int64 f_files;          /* total number of files in this =
partition
*/
}; /* IT IS NOT a complete structure definition */


typedef struct Volume {
    struct rx_queue q;          /* Volume hash chain pointers */
    VolumeId hashid;            /* Volume number -- for hash table =
lookup */
    void *header;   /* Cached disk data - FAKED TYPE */
    Device device;              /* Unix device for the volume */
    struct DiskPartition64
     *partition;                /* Information about the Unix partition =
*/

}; /* it is not the entire structure! */


BEGIN
{
  start_timestamp =3D timestamp;
  start_walltimestamp =3D walltimestamp;
}


pid$1:a.out:FetchData_RXStyle:entry
{
  self->fetchdata =3D 1;
  this->volume =3D (struct Volume *)copyin(arg0, sizeof(struct Volume));
  this->partition =3D (struct DiskPartition64
*)copyin((uintptr_t)this->volume->partition, sizeof(struct
DiskPartition64));
  self->volumeid =3D this->volume->hashid;
  self->partition_name =3D copyinstr((uintptr_t)this->partition->name);
}

pid$1:a.out:FetchData_RXStyle:return
/ self->fetchdata /
{
  self->fetchdata =3D 0;
  self->volumeid =3D 0;
  self->partition_name =3D 0;
}

syscall::readv:return
/ self->fetchdata /
{
  @rx_fetchdata[self->partition_name, self->volumeid] =3D sum(arg0);
  @rx_fetchdata_total =3D sum(arg0);
}

pid$1:a.out:StoreData_RXStyle:entry
{
  self->storedata =3D 1;
  this->volume =3D (struct Volume *)copyin(arg0, sizeof(struct Volume));
  this->partition =3D (struct DiskPartition64
*)copyin((uintptr_t)this->volume->partition, sizeof(struct
DiskPartition64));
  self->volumeid =3D this->volume->hashid;
  self->partition_name =3D copyinstr((uintptr_t)this->partition->name);
}

pid$1:a.out:StoreData_RXStyle:return
/ self->storedata /
{
  self->storedata =3D 0;
  self->volumeid =3D 0;
  self->partition_name =3D 0;
}

syscall::writev:return
/ self->storedata /
{
  @rx_storedata[self->partition_name, self->volumeid] =3D sum(arg0);
  @rx_storedata_total =3D sum(arg0);
}


tick-$2
{
  setopt("aggsortrev", "true");
  printf("\033[H\033[J");

  normalize(@rx_fetchdata, 1024*1024);
  normalize(@rx_storedata, 1024*1024);
  normalize(@rx_fetchdata_total, 1024*1024);
  normalize(@rx_storedata_total, 1024*1024);

  printf("Mountpoint        Volume ID   Read[MB] Wrote[MB]\n");
  printf("=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D\n");
  printa("%-15s %12d %@9d %@9d\n", \
         @rx_fetchdata, @rx_storedata);
  printf("                =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =
=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D\n");
  printf("%-15s %12s ", " ", " ");
  printa("%@9d %@9d\n", @rx_fetchdata_total, @rx_storedata_total);

  printf("\n\n");
  printf("   started: %Y\n", start_walltimestamp);
  printf("   current: %Y\n", walltimestamp);

}



--=20
Robert Milkowski
http://milek.blogspot.com