[OpenAFS] butc segmentation fault in 1.4.0 volrestore

John W. Sopko Jr. sopko@cs.unc.edu
Tue, 10 Jan 2006 08:33:44 -0500


I updated from 1.2.13 to 1.4.0 a week ago. Everything seems to be working
fine, (great job everyone). We ran into a problem when doing a volume
restore. The butc process gets a segmentation fault. We have been doing
backup dumps using the 1.4.0 butc without a problem.

I tried 2 different tapes on 2 different tape drive on 2 different
machines. I used the 1.2.13 butc and it worked fine.

Below is the output from butc and the backup command, the backup tape log,
(backup/TL_st0), does not have any more information then the butc output.
There is not much information here. I also included an strace command
output from the point where butc prompts for the tape.

The restore seems to start working, there are no operating system errors
for the tape drive which is a SDLT320.

The volrestore starts and the tape drive starts to access the tape for
a minute or so and then the butc segfaults. I tried with and without
the -localauth flag. I will be happy to try whatever commands to help
debug this.


backup> volrestore quail i D3.prj.crs.cmp118s05.backup -extension .rs 
-date  1/9/2006 -portoffset 4

butc output:
------------

quail/root [/.../cs.unc.edu/home/houseman] # butc 3 -localauth -debuglevel 3
BUFFERSIZE is 16384 KBytes
Starting Tape Coordinator: Port offset 3   Debug level 3
Token expires: NEVER



Restore
Restoring volume D3.prj.crs.cmp118s05.rs
Prompt for tape quail_daily.Full.1 (1135951370)
******* OPERATOR ATTENTION *******
Device :  /dev/st0
Please put in tape quail_daily.Full.1 (1135951370) for reading and hit 
return when done

Thanks, now proceeding with tape reading operation.
**********************************
Proceeding with tape operation
Segmentation fault



strace output:
--------------
Restore
Restoring volume D3.home.sopko.rs
Prompt for tape toucan_daily.Full.1 (1135951397)
******* OPERATOR ATTENTION *******
Device :  /dev/st0
Please put in tape toucan_daily.Full.1 (1135951397) for reading and hit 
return when done
)   = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899569, 651395}, NULL) = 0
time(NULL)                              = 1136899569
gettimeofday({1136899569, 651505}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\361\320\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899569
futex(0x86d4760, FUTEX_WAIT, 3, NULL
Thanks, now proceeding with tape reading operation.
**********************************
Proceeding with tape operation
)   = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899579, 671554}, NULL) = 0
time(NULL)                              = 1136899579
gettimeofday({1136899579, 671658}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\361\330\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899579
futex(0x86d4760, FUTEX_WAIT, 5, NULL)   = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899589, 691607}, NULL) = 0
time(NULL)                              = 1136899589
gettimeofday({1136899589, 691707}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\361\340\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899589
futex(0x86d4760, FUTEX_WAIT, 7, NULL)   = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899599, 711693}, NULL) = 0
time(NULL)                              = 1136899599
gettimeofday({1136899599, 711796}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\361\350\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899599
futex(0x86d4760, FUTEX_WAIT, 9, NULL)   = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899609, 731761}, NULL) = 0
time(NULL)                              = 1136899609
gettimeofday({1136899609, 731868}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\361\360\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899609
futex(0x86d4760, FUTEX_WAIT, 11, NULL)  = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899619, 751916}, NULL) = 0
time(NULL)                              = 1136899619
gettimeofday({1136899619, 752018}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\361\370\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899619
futex(0x86d4760, FUTEX_WAIT, 13, NULL)  = 0
futex(0x80d5a18, FUTEX_WAKE, 1)         = 0
gettimeofday({1136899629, 771959}, NULL) = 0
time(NULL)                              = 1136899629
gettimeofday({1136899629, 772061}, NULL) = 0
sendmsg(5, {msg_name(16)={sa_family=AF_INET, sin_port=htons(33549), 
sin_addr=inet_addr("152.2.128.3")}, 
msg_iov(2)=[{"\231\4dd\17*\362\0\0\0\0\2\0\0\0\1\0\0\0\2\1\4\0\0\0\0"..., 
28}, {"\0\0\0R\0\0\0e\0\0\0s\0\0\0t\0\0\0o\0\0\0r\0\0\0e\0\0\0"..., 
536}], msg_controllen=0, msg_flags=0}, 0) = 564
time(NULL)                              = 1136899629
futex(0x86d4760, FUTEX_WAIT, 15, NULL)  = -1 EINTR (Interrupted system call)
toucan/root [/usr/afs/backup] #


-- 
John W. Sopko Jr.               University of North Carolina
email: sopko AT cs.unc.edu      Computer Science Dept., CB 3175
Phone: 919-962-1844             Sitterson Hall; Room 044
Fax:   919-962-1799             Chapel Hill, NC 27599-3175