1. Kill the hot relocation daemon.
Example:
kill -9 25233 >/dev/null 2>&1
kill -9 25235 >/dev/null 2>&1
2. Create 2 volumes:
Mirrored:
/usr/sbin/vxassist -g testdg make test1 102400 layout=mirror testdg01 testdg02
The test1 volume was created successfully.
Concateated:
/usr/sbin/vxassist -g testdg make test2 102400 layout=concat testdg02
The test2 volume was created successfully.
3. Create file systems on those volumes:
/sbin/mkfs -t vxfs /dev/vx/rdsk/testdg/test1
version 9 layout
102400 sectors, 51200 blocks of size 1024, log size 1024 blocks
rcq size 1024 blocks
largefiles supported
/sbin/mkfs -t vxfs /dev/vx/rdsk/testdg/test2
version 9 layout
102400 sectors, 51200 blocks of size 1024, log size 1024 blocks
rcq size 1024 blocks
largefiles supported
4. Mount the file systems:
/bin/mount -t vxfs /dev/vx/dsk/testdg/test1 /test1
The test1 volume is mounted to /test1.
/bin/mount -t vxfs /dev/vx/dsk/testdg/test2 /test2
The test2 volume is mounted to /test2.
5. Copy some files on those file systems:
/bin/cp /etc/default/* /test1
/bin/cp /etc/default/* /test2
sleep 15
6. Now, suppose a disk has failed permanently in the testdg disk group and we have to troubleshoot and repair the failure. For this failure you are using a different disk because this is
a permanent failure.
7. Check the Status of filesystems:
[root@server1]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda1 21225712 4711468 15418616 24% /
tmpfs 1029756 0 1029756 0% /dev/shm
tmpfs 4 0 4 0% /dev/vx
mgt:/student 21225728 6176640 13953472 31% /student
/dev/vx/dsk/testdg/test1
51200 3177 45028 7% /test1
df: `/test2': Input/output error
8. Check the disk status in DGs:
[root@server1]# vxdisk -o alldgs list
DEVICE TYPE DISK GROUP STATUS
emc0_dd1 auto:cdsdisk testdg01 testdg online
emc0_dd2 auto:cdsdisk - - online invalid
emc0_dd3 auto:cdsdisk testdg03 testdg online
emc0_dd4 auto:cdsdisk - - online
emc0_dd5 auto:none - - online invalid
emc0_dd6 auto:none - - online invalid
emc0_dd7 auto:none - - online invalid
emc0_dd8 auto:none - - online invalid
emc0_dd9 auto:none - - online invalid
emc0_d10 auto:none - - online invalid
emc0_d11 auto:none - - online invalid
emc0_d12 auto:none - - online invalid
sda auto:none - - online invalid
sdb auto:none - - online invalid
- - testdg02 testdg failed was:emc0_dd2
9. Check the status of volumes:
* Both the volume got affected because this disk was part of both the volumes, but we find that
one plex of mirrored volume test1 is enabled, so data in this volume is accessible.
Other volume test2 is just a concatenated volume with no redundancy, hence its data is not accessible.
[root@server1]# vxprint -g testdg -htu h
DG NAME NCONFIG NLOG MINORS GROUP-ID
ST NAME STATE DM_CNT SPARE_CNT APPVOL_CNT
DM NAME DEVICE TYPE PRIVLEN PUBLEN STATE
RV NAME RLINK_CNT KSTATE STATE PRIMARY DATAVOLS SRL
RL NAME RVG KSTATE STATE REM_HOST REM_DG REM_RLNK
CO NAME CACHEVOL KSTATE STATE
VT NAME RVG KSTATE STATE NVOLUME
V NAME RVG/VSET/CO KSTATE STATE LENGTH READPOL PREFPLEX UTYPE
PL NAME VOLUME KSTATE STATE LENGTH LAYOUT NCOL/WID MODE
SD NAME PLEX DISK DISKOFFS LENGTH [COL/]OFF DEVICE MODE
SV NAME PLEX VOLNAME NVOLLAYR LENGTH [COL/]OFF AM/NM MODE
SC NAME PLEX CACHE DISKOFFS LENGTH [COL/]OFF DEVICE MODE
DC NAME PARENTVOL LOGVOL
SP NAME SNAPVOL DCO
EX NAME ASSOC VC PERMS MODE STATE
SR NAME KSTATE
dg testdg default default 1000 1422525023.21.sym1
dm testdg01 emc0_dd1 auto 32.00m 1.96g -
dm testdg02 - - - - NODEVICE
dm testdg03 emc0_dd3 auto 32.00m 1.96g -
v test1 - ENABLED ACTIVE 50.00m SELECT - fsgen
pl test1-01 test1 ENABLED ACTIVE 50.00m CONCAT - RW
sd testdg01-01 test1-01 testdg01 0.00 50.00m 0.00 emc0_dd1 ENA
pl test1-02 test1 DISABLED NODEVICE 50.00m CONCAT - RW
sd testdg02-01 test1-02 testdg02 0.00 50.00m 0.00 - NDEV
v test2 - DISABLED ACTIVE 50.00m SELECT - fsgen
pl test2-01 test2 DISABLED NODEVICE 50.00m CONCAT - RW
sd testdg02-02 test2-01 testdg02 50.00m 50.00m 0.00 - NDEV
10. Non mirrored volume shows I/O error in df -k command output:
[root@server1]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda1 21225712 4711516 15418568 24% /
tmpfs 1029756 0 1029756 0% /dev/shm
tmpfs 4 0 4 0% /dev/vx
mgt:/student 21225728 6176640 13953472 31% /student
/dev/vx/dsk/testdg/test1
51200 3177 45028 7% /test1
df: `/test2': Input/output error
[root@server1]# ls /test1
lost+found nss sfm_resolv.conf useradd
[root@server1]# ls /test2
ls: /test2: Input/output error
11. Prepare a new disk for the failed disk's replacement as follows:
Initialize the new disk:-
[root@server1]# vxdisksetup -i emc0_dd4
Add the disk to the DG using same disk media name as the failed disk:-
[root@server1]# vxdg -g testdg adddisk testdg02=emc0_dd4
VxVM vxdg ERROR V-5-1-559 Disk testdg02: Name is already used
[root@server1]# vxdg -g testdg -k adddisk testdg02=emc0_dd4 <----- -k option is required.
12. Verify the new disk addition to the DG:
[root@server1]# vxdisk -o alldgs list
DEVICE TYPE DISK GROUP STATUS
emc0_dd1 auto:cdsdisk testdg01 testdg online
emc0_dd2 auto:cdsdisk - - online invalid
emc0_dd3 auto:cdsdisk testdg03 testdg online
emc0_dd4 auto:cdsdisk testdg02 testdg online <----- Disk is added and online
emc0_dd5 auto:none - - online invalid
emc0_dd6 auto:none - - online invalid
emc0_dd7 auto:none - - online invalid
emc0_dd8 auto:none - - online invalid
emc0_dd9 auto:none - - online invalid
emc0_d10 auto:none - - online invalid
emc0_d11 auto:none - - online invalid
emc0_d12 auto:none - - online invalid
sda auto:none - - online invalid
sdb auto:none - - online invalid
13. After disk addition, we need to recover the volumes as follows:-
The 'vxrecover' program performs plex attach and it will resynchronize the mirrors.
[root@server1]# vxrecover
[root@server1]# vxprint -g testdg -htu h
DG NAME NCONFIG NLOG MINORS GROUP-ID
ST NAME STATE DM_CNT SPARE_CNT APPVOL_CNT
DM NAME DEVICE TYPE PRIVLEN PUBLEN STATE
RV NAME RLINK_CNT KSTATE STATE PRIMARY DATAVOLS SRL
RL NAME RVG KSTATE STATE REM_HOST REM_DG REM_RLNK
CO NAME CACHEVOL KSTATE STATE
VT NAME RVG KSTATE STATE NVOLUME
V NAME RVG/VSET/CO KSTATE STATE LENGTH READPOL PREFPLEX UTYPE
PL NAME VOLUME KSTATE STATE LENGTH LAYOUT NCOL/WID MODE
SD NAME PLEX DISK DISKOFFS LENGTH [COL/]OFF DEVICE MODE
SV NAME PLEX VOLNAME NVOLLAYR LENGTH [COL/]OFF AM/NM MODE
SC NAME PLEX CACHE DISKOFFS LENGTH [COL/]OFF DEVICE MODE
DC NAME PARENTVOL LOGVOL
SP NAME SNAPVOL DCO
EX NAME ASSOC VC PERMS MODE STATE
SR NAME KSTATE
dg testdg default default 1000 1422525023.21.sym1
dm testdg01 emc0_dd1 auto 32.00m 1.96g -
dm testdg02 emc0_dd4 auto 32.00m 1.96g -
dm testdg03 emc0_dd3 auto 32.00m 1.96g -
v test1 - ENABLED ACTIVE 50.00m SELECT - fsgen
pl test1-01 test1 ENABLED ACTIVE 50.00m CONCAT - RW
sd testdg01-01 test1-01 testdg01 0.00 50.00m 0.00 emc0_dd1 ENA
pl test1-02 test1 ENABLED ACTIVE 50.00m CONCAT - RW <----- This plex is also enabled now.
sd testdg02-01 test1-02 testdg02 0.00 50.00m 0.00 emc0_dd4 ENA
v test2 - DISABLED ACTIVE 50.00m SELECT - fsgen
pl test2-01 test2 DISABLED IOFAIL 50.00m CONCAT - RW <----- This plex is still disabled.
sd testdg02-02 test2-01 testdg02 50.00m 50.00m 0.00 emc0_dd4 ENA
* Note: More details about vxrecover is at last under section: "Summary of vxrecover".
14 . vxrecover command will not succeed in recovering a non-redundant volume.
In this case, you may need to start the non-redundant volume forcibly using the "vxvol -f start" command as follows:
* Note: Starting a volume is necessary before you can perform any I/O on the volume, for example to restore data from a backup.
[root@server1]# vxvol -g testdg -f start test2
[root@server1]# vxprint -g testdg -htu h
DG NAME NCONFIG NLOG MINORS GROUP-ID
ST NAME STATE DM_CNT SPARE_CNT APPVOL_CNT
DM NAME DEVICE TYPE PRIVLEN PUBLEN STATE
RV NAME RLINK_CNT KSTATE STATE PRIMARY DATAVOLS SRL
RL NAME RVG KSTATE STATE REM_HOST REM_DG REM_RLNK
CO NAME CACHEVOL KSTATE STATE
VT NAME RVG KSTATE STATE NVOLUME
V NAME RVG/VSET/CO KSTATE STATE LENGTH READPOL PREFPLEX UTYPE
PL NAME VOLUME KSTATE STATE LENGTH LAYOUT NCOL/WID MODE
SD NAME PLEX DISK DISKOFFS LENGTH [COL/]OFF DEVICE MODE
SV NAME PLEX VOLNAME NVOLLAYR LENGTH [COL/]OFF AM/NM MODE
SC NAME PLEX CACHE DISKOFFS LENGTH [COL/]OFF DEVICE MODE
DC NAME PARENTVOL LOGVOL
SP NAME SNAPVOL DCO
EX NAME ASSOC VC PERMS MODE STATE
SR NAME KSTATE
dg testdg default default 1000 1422525023.21.sym1
dm testdg01 emc0_dd1 auto 32.00m 1.96g -
dm testdg02 emc0_dd4 auto 32.00m 1.96g -
dm testdg03 emc0_dd3 auto 32.00m 1.96g -
v test1 - ENABLED ACTIVE 50.00m SELECT - fsgen
pl test1-01 test1 ENABLED ACTIVE 50.00m CONCAT - RW
sd testdg01-01 test1-01 testdg01 0.00 50.00m 0.00 emc0_dd1 ENA
pl test1-02 test1 ENABLED ACTIVE 50.00m CONCAT - RW
sd testdg02-01 test1-02 testdg02 0.00 50.00m 0.00 emc0_dd4 ENA
v test2 - ENABLED ACTIVE 50.00m SELECT - fsgen
pl test2-01 test2 ENABLED ACTIVE 50.00m CONCAT - RW <----- This volume is also enabled now.
sd testdg02-02 test2-01 testdg02 50.00m 50.00m 0.00 emc0_dd4 ENA
15. Check the df -k output now:- // It still shows I/O error.
[root@server1]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda1 21225712 4711628 15418456 24% /
tmpfs 1029756 0 1029756 0% /dev/shm
tmpfs 4 0 4 0% /dev/vx
mgt:/student 21225728 6176640 13953472 31% /student
/dev/vx/dsk/testdg/test1
51200 3177 45028 7% /test1
df: `/test2': Input/output error
16. Since test2 volume has single disk, which was failed, so data will be lost and you need to recover it from backup.
Before doing that, perform the steps of unounting the volume, creating the file system and mounting it again.
[root@server1]# umount /test2
[root@server1]# mkfs -t vxfs /dev/vx/rdsk/testdg/test2
version 9 layout
102400 sectors, 51200 blocks of size 1024, log size 1024 blocks
rcq size 1024 blocks
largefiles supported
[root@server1]# mount -t vxfs /dev/vx/dsk/testdg/test2 /test2
[root@server1]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda1 21225712 4711624 15418460 24% /
tmpfs 1029756 0 1029756 0% /dev/shm
tmpfs 4 0 4 0% /dev/vx
mgt:/student 21225728 6176640 13953472 31% /student
/dev/vx/dsk/testdg/test1
51200 3177 45028 7% /test1
/dev/vx/dsk/testdg/test2
51200 3173 45033 7% /test2
[root@server1]# ls /test1
lost+found nss sfm_resolv.conf useradd
[root@server1]# ls /test2
lost+found
*Note: After this, data can be copied from backup.
No comments:
Post a Comment