Friday, March 6, 2015

Recovering from temporary disk failure with vxattachd


Check whether vxattachd daemon is running. If it is not running, please start using "vxattachd &".
================================================================[root@server1]# ps -ef | grep vxattachd
root      7347     1  0 01:10 ?        00:00:00 /bin/sh - /usr/lib/vxvm/bin/vxattachd root
root      7400  7347  0 01:10 ?        00:00:00 /bin/sh - /usr/lib/vxvm/bin/vxattachd root
root     24725  9119  0 04:04 pts/1    00:00:00 grep vxattachd


Create a mirrored volume:
========================
** This example assumes that the DG has already been created and 4 disks are added to it.
[root@server1]# vxassist -g appdg make appvol 100m layout=mirror
[root@server1]#


Check the volume details:
========================

[root@server1]# vxprint -g appdg -htu h
DG NAME         NCONFIG      NLOG     MINORS   GROUP-ID
ST NAME         STATE        DM_CNT   SPARE_CNT         APPVOL_CNT
DM NAME         DEVICE       TYPE     PRIVLEN  PUBLEN   STATE
RV NAME         RLINK_CNT    KSTATE   STATE    PRIMARY  DATAVOLS  SRL
RL NAME         RVG          KSTATE   STATE    REM_HOST REM_DG    REM_RLNK
CO NAME         CACHEVOL     KSTATE   STATE
VT NAME         RVG          KSTATE   STATE    NVOLUME
V  NAME         RVG/VSET/CO  KSTATE   STATE    LENGTH   READPOL   PREFPLEX UTYPE
PL NAME         VOLUME       KSTATE   STATE    LENGTH   LAYOUT    NCOL/WID MODE
SD NAME         PLEX         DISK     DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
SV NAME         PLEX         VOLNAME  NVOLLAYR LENGTH   [COL/]OFF AM/NM    MODE
SC NAME         PLEX         CACHE    DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
DC NAME         PARENTVOL    LOGVOL
SP NAME         SNAPVOL      DCO
EX NAME         ASSOC        VC                       PERMS    MODE     STATE
SR NAME         KSTATE
dg appdg        default      default  1000     1422525023.21.sym1
dm appdg01      emc0_dd1     auto     32.00m   1.96g    -
dm appdg02      emc0_dd2     auto     32.00m   1.96g    -
dm appdg03      emc0_dd3     auto     32.00m   1.96g    -
dm appdg04      emc0_dd4     auto     32.00m   1.96g    -
v  appvol       -            ENABLED  ACTIVE   100.00m  SELECT    -        fsgen
pl appvol-01    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg01-01   appvol-01    appdg01  0.00     100.00m  0.00      emc0_dd1 ENA
pl appvol-02    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg02-01   appvol-02    appdg02  0.00     100.00m  0.00      emc0_dd2 ENA


Create a filesystem:-
===================
[root@server1]# mkfs -t vxfs /dev/vx/rdsk/appdg/appvol
    version 9 layout
    204800 sectors, 102400 blocks of size 1024, log size 1024 blocks
    rcq size 1024 blocks
    largefiles supported

Mount the filesystem:-
=====================
[root@server1]# mount -t vxfs /dev/vx/dsk/appdg/appvol /app
[root@server1]#
[root@server1]#
[root@server1]#
[root@server1]#
[root@server1]#
[root@server1]# df -k
Filesystem           1K-blocks      Used Available Use% Mounted on
/dev/sda1             21225712   4738764  15391320  24% /
tmpfs                  1029756         0   1029756   0% /dev/shm
tmpfs                        4         0         4   0% /dev/vx
/dev/vx/dsk/appdg/appvol
                        102400      3189     93018   4% /app <----- Mounted


Find out the paths of the first device used by the volume:-
=========================================================

[root@server1]# vxdisk list emc0_dd1
Device:    emc0_dd1
devicetag: emc0_dd1
type:      auto
hostid:    sym1
disk:      name=appdg01 id=1422446109.24.sym1
group:     name=appdg id=1422525023.21.sym1
info:      format=cdsdisk,privoffset=256,pubslice=3,privslice=3
flags:     online ready private autoconfig autoimport imported
pubpaths:  block=/dev/vx/dmp/emc0_dd1s3 char=/dev/vx/rdmp/emc0_dd1s3
guid:      {820742fe-a6e4-11e4-b32c-096052716248}
udid:      EMC%5FSYMMETRIX%5F313635323300%5FDD0DD1
site:      -
version:   3.1
iosize:    min=512 (bytes) max=1024 (blocks)
public:    slice=3 offset=65792 len=4120320 disk_offset=0
private:   slice=3 offset=256 len=65536 disk_offset=0
update:    time=1422532581 seqno=0.56
ssb:       actual_seqno=0.0
headers:   0 240
configs:   count=1 len=51360
logs:      count=1 len=4096
Defined regions:
 config   priv 000048-000239[000192]: copy=01 offset=000000 enabled
 config   priv 000256-051423[051168]: copy=01 offset=000192 enabled
 log      priv 051424-055519[004096]: copy=01 offset=000000 enabled
 lockrgn  priv 055520-055663[000144]: part=00 offset=000000
Multipathing information:
numpaths:   2
sdc             state=enabled <----- Path1
sdd             state=enabled <----- Path2


Disable all the paths of the device emc0_dd1:-
=============================================

[root@server1]# vxdmpadm -f disable path=sdc,sdd

[root@server1]# vxdisk list emc0_dd1
Device:    emc0_dd1
devicetag: emc0_dd1
type:      auto
hostid:    sym1
disk:      name=appdg01 id=1422446109.24.sym1
group:     name=appdg id=1422525023.21.sym1
info:      format=cdsdisk,privoffset=256,pubslice=3,privslice=3
flags:     online ready private autoconfig autoimport imported
pubpaths:  block=/dev/vx/dmp/emc0_dd1s3 char=/dev/vx/rdmp/emc0_dd1s3
guid:      {820742fe-a6e4-11e4-b32c-096052716248}
udid:      EMC%5FSYMMETRIX%5F313635323300%5FDD0DD1
site:      -
version:   3.1
iosize:    min=512 (bytes) max=1024 (blocks)
public:    slice=3 offset=65792 len=4120320 disk_offset=0
private:   slice=3 offset=256 len=65536 disk_offset=0
update:    time=1422532581 seqno=0.56
ssb:       actual_seqno=0.0
headers:   0 240
configs:   count=1 len=51360
logs:      count=1 len=4096
Defined regions:
 config   priv 000048-000239[000192]: copy=01 offset=000000 enabled
 config   priv 000256-051423[051168]: copy=01 offset=000192 enabled
 log      priv 051424-055519[004096]: copy=01 offset=000000 enabled
 lockrgn  priv 055520-055663[000144]: part=00 offset=000000
Multipathing information:
numpaths:   2
sdc             state=disabled <-----
sdd             state=disabled <-----


Although both the paths of that device are failed, immediately checking the volume status doesn't show the plex status disabled.
================================================================


[root@server1]# vxprint -g appdg -htu h
DG NAME         NCONFIG      NLOG     MINORS   GROUP-ID
ST NAME         STATE        DM_CNT   SPARE_CNT         APPVOL_CNT
DM NAME         DEVICE       TYPE     PRIVLEN  PUBLEN   STATE
RV NAME         RLINK_CNT    KSTATE   STATE    PRIMARY  DATAVOLS  SRL
RL NAME         RVG          KSTATE   STATE    REM_HOST REM_DG    REM_RLNK
CO NAME         CACHEVOL     KSTATE   STATE
VT NAME         RVG          KSTATE   STATE    NVOLUME
V  NAME         RVG/VSET/CO  KSTATE   STATE    LENGTH   READPOL   PREFPLEX UTYPE
PL NAME         VOLUME       KSTATE   STATE    LENGTH   LAYOUT    NCOL/WID MODE
SD NAME         PLEX         DISK     DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
SV NAME         PLEX         VOLNAME  NVOLLAYR LENGTH   [COL/]OFF AM/NM    MODE
SC NAME         PLEX         CACHE    DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
DC NAME         PARENTVOL    LOGVOL
SP NAME         SNAPVOL      DCO
EX NAME         ASSOC        VC                       PERMS    MODE     STATE
SR NAME         KSTATE
dg appdg        default      default  1000     1422525023.21.sym1
dm appdg01      emc0_dd1     auto     32.00m   1.96g    -
dm appdg02      emc0_dd2     auto     32.00m   1.96g    -
dm appdg03      emc0_dd3     auto     32.00m   1.96g    -
dm appdg04      emc0_dd4     auto     32.00m   1.96g    -
v  appvol       -            ENABLED  ACTIVE   100.00m  SELECT    -        fsgen
pl appvol-01    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg01-01   appvol-01    appdg01  0.00     100.00m  0.00      emc0_dd1 ENA
pl appvol-02    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg02-01   appvol-02    appdg02  0.00     100.00m  0.00      emc0_dd2 ENA

 
Same happens if we check the disk details. Immediately checking the disk details doesn't show that it is failed.

================================================================

[root@server1]# vxdisk -o alldgs listDEVICE       TYPE            DISK         GROUP        STATUS
emc0_dd1     auto:cdsdisk    appdg01      appdg        online
emc0_dd2     auto:cdsdisk    appdg02      appdg        online
emc0_dd3     auto:cdsdisk    appdg03      appdg        online
emc0_dd4     auto:cdsdisk    appdg04      appdg        online
emc0_dd5     auto:cdsdisk    -            -            online
emc0_dd6     auto:none       -            -            online invalid
emc0_dd7     auto:none       -            -            online invalid
emc0_dd8     auto:none       -            -            online invalid
emc0_dd9     auto:none       -            -            online invalid
emc0_d10     auto:none       -            -            online invalid
emc0_d11     auto:none       -            -            online invalid
emc0_d12     auto:none       -            -            online invalid
sda          auto:none       -            -            online invalid
sdb          auto:none       -            -            online invalid


[root@server1]# df -k
Filesystem           1K-blocks      Used Available Use% Mounted on
/dev/sda1             21225712   4739072  15391012  24% /
tmpfs                  1029756         0   1029756   0% /dev/shm
tmpfs                        4         0         4   0% /dev/vx
mgt:/student          21225728   6176640  13953440  31% /student
/dev/vx/dsk/appdg/appvol
                        102400      3189     93018   4% /app
[root@server1]#
[root@server1]#
[root@server1]#


Now, we can use the dd command to write to the file system to produce a failure.
=============================================================

[root@server1]# dd if=/dev/zero of=/app/test1 bs=1 count=10
10+0 records in
10+0 records out
10 bytes (10 B) copied, 8.2e-05 seconds, 122 kB/s

Check the volume status again:-
=============================
[root@server1]# vxprint -g appdg -htu h
DG NAME         NCONFIG      NLOG     MINORS   GROUP-ID
ST NAME         STATE        DM_CNT   SPARE_CNT         APPVOL_CNT
DM NAME         DEVICE       TYPE     PRIVLEN  PUBLEN   STATE
RV NAME         RLINK_CNT    KSTATE   STATE    PRIMARY  DATAVOLS  SRL
RL NAME         RVG          KSTATE   STATE    REM_HOST REM_DG    REM_RLNK
CO NAME         CACHEVOL     KSTATE   STATE
VT NAME         RVG          KSTATE   STATE    NVOLUME
V  NAME         RVG/VSET/CO  KSTATE   STATE    LENGTH   READPOL   PREFPLEX UTYPE
PL NAME         VOLUME       KSTATE   STATE    LENGTH   LAYOUT    NCOL/WID MODE
SD NAME         PLEX         DISK     DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
SV NAME         PLEX         VOLNAME  NVOLLAYR LENGTH   [COL/]OFF AM/NM    MODE
SC NAME         PLEX         CACHE    DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
DC NAME         PARENTVOL    LOGVOL
SP NAME         SNAPVOL      DCO
EX NAME         ASSOC        VC                       PERMS    MODE     STATE
SR NAME         KSTATE
dg appdg        default      default  1000     1422525023.21.sym1
dm appdg01      -            -        -        -        NODEVICE
dm appdg02      emc0_dd2     auto     32.00m   1.96g    -
dm appdg03      emc0_dd3     auto     32.00m   1.96g    -
dm appdg04      emc0_dd4     auto     32.00m   1.96g    -
v  appvol       -            ENABLED  ACTIVE   100.00m  SELECT    -        fsgen
pl appvol-01    appvol       DISABLED NODEVICE 100.00m  CONCAT    -        RW  <----- It shows that this plex is disabled.
sd appdg01-01   appvol-01    appdg01  0.00     100.00m  0.00      -        RLOC
pl appvol-02    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg02-01   appvol-02    appdg02  0.00     100.00m  0.00      emc0_dd2 ENA

Check the disk status again, which shows that it is failed:-
==========================================================

[root@server1]# vxdisk -o alldgs list
DEVICE       TYPE            DISK         GROUP        STATUS
emc0_dd1     auto            -            -            error
emc0_dd2     auto:cdsdisk    appdg02      appdg        online
emc0_dd3     auto:cdsdisk    appdg03      appdg        online
emc0_dd4     auto:cdsdisk    appdg04      appdg        online
emc0_dd5     auto:cdsdisk    -            -            online
emc0_dd6     auto:none       -            -            online invalid
emc0_dd7     auto:none       -            -            online invalid
emc0_dd8     auto:none       -            -            online invalid
emc0_dd9     auto:none       -            -            online invalid
emc0_d10     auto:none       -            -            online invalid
emc0_d11     auto:none       -            -            online invalid
emc0_d12     auto:none       -            -            online invalid
sda          auto:none       -            -            online invalid
sdb          auto:none       -            -            online invalid
-            -         appdg01      appdg        failed was:emc0_dd1
[root@server1]#
[root@server1]#



Now enable all paths to the failed device:-
=========================================

[root@server1]# vxdmpadm enable path=sdc,sdd

Verify that the paths are enabled:-
==================================

[root@server1]# vxdisk list emc0_dd1
Device:    emc0_dd1
devicetag: emc0_dd1
type:      auto
hostid:    sym1
disk:      name= id=1422446109.24.sym1
group:     name=appdg id=1422525023.21.sym1
info:      format=cdsdisk,privoffset=256,pubslice=3,privslice=3
flags:     online ready private autoconfig autoimport
pubpaths:  block=/dev/vx/dmp/emc0_dd1s3 char=/dev/vx/rdmp/emc0_dd1s3
guid:      {820742fe-a6e4-11e4-b32c-096052716248}
udid:      EMC%5FSYMMETRIX%5F313635323300%5FDD0DD1
site:      -
version:   3.1
iosize:    min=512 (bytes) max=1024 (blocks)
public:    slice=3 offset=65792 len=4120320 disk_offset=0
private:   slice=3 offset=256 len=65536 disk_offset=0
update:    time=1422532581 seqno=0.56
ssb:       actual_seqno=0.0
headers:   0 240
configs:   count=1 len=51360
logs:      count=1 len=4096
Defined regions:
 config   priv 000048-000239[000192]: copy=01 offset=000000 enabled
 config   priv 000256-051423[051168]: copy=01 offset=000192 enabled
 log      priv 051424-055519[004096]: copy=01 offset=000000 enabled
 lockrgn  priv 055520-055663[000144]: part=00 offset=000000
Multipathing information:
numpaths:   2
sdc             state=enabled <----- It shows paths are enabled.
sdd             state=enabled

Check the disk and volume status now. The disk and affected plex status may still be disabled because
vxattachd daemon may take some time to sense that device is back online and then it reattaches the
device and recovers the failed plexes.
======================================

[root@server1]# vxdisk -o alldgs list
DEVICE       TYPE            DISK         GROUP        STATUS
emc0_dd1     auto:cdsdisk    -            (appdg)      online
emc0_dd2     auto:cdsdisk    appdg02      appdg        online
emc0_dd3     auto:cdsdisk    appdg03      appdg        online
emc0_dd4     auto:cdsdisk    appdg04      appdg        online
emc0_dd5     auto:cdsdisk    -            -            online
emc0_dd6     auto:none       -            -            online invalid
emc0_dd7     auto:none       -            -            online invalid
emc0_dd8     auto:none       -            -            online invalid
emc0_dd9     auto:none       -            -            online invalid
emc0_d10     auto:none       -            -            online invalid
emc0_d11     auto:none       -            -            online invalid
emc0_d12     auto:none       -            -            online invalid
sda          auto:none       -            -            online invalid
sdb          auto:none       -            -            online invalid
-            -         appdg01      appdg        failed was:emc0_dd1 <----- still disabled

[root@server1]# vxprint -g appdg -htu h
DG NAME         NCONFIG      NLOG     MINORS   GROUP-ID
ST NAME         STATE        DM_CNT   SPARE_CNT         APPVOL_CNT
DM NAME         DEVICE       TYPE     PRIVLEN  PUBLEN   STATE
RV NAME         RLINK_CNT    KSTATE   STATE    PRIMARY  DATAVOLS  SRL
RL NAME         RVG          KSTATE   STATE    REM_HOST REM_DG    REM_RLNK
CO NAME         CACHEVOL     KSTATE   STATE
VT NAME         RVG          KSTATE   STATE    NVOLUME
V  NAME         RVG/VSET/CO  KSTATE   STATE    LENGTH   READPOL   PREFPLEX UTYPE
PL NAME         VOLUME       KSTATE   STATE    LENGTH   LAYOUT    NCOL/WID MODE
SD NAME         PLEX         DISK     DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
SV NAME         PLEX         VOLNAME  NVOLLAYR LENGTH   [COL/]OFF AM/NM    MODE
SC NAME         PLEX         CACHE    DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
DC NAME         PARENTVOL    LOGVOL
SP NAME         SNAPVOL      DCO
EX NAME         ASSOC        VC                       PERMS    MODE     STATE
SR NAME         KSTATE
dg appdg        default      default  1000     1422525023.21.sym1
dm appdg01      -            -        -        -        NODEVICE
dm appdg02      emc0_dd2     auto     32.00m   1.96g    -
dm appdg03      emc0_dd3     auto     32.00m   1.96g    -
dm appdg04      emc0_dd4     auto     32.00m   1.96g    -
v  appvol       -            ENABLED  ACTIVE   100.00m  SELECT    -        fsgen
pl appvol-01    appvol       DISABLED NODEVICE 100.00m  CONCAT    -        RW  <----- still disabled
sd appdg01-01   appvol-01    appdg01  0.00     100.00m  0.00      -        RLOC
pl appvol-02    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg02-01   appvol-02    appdg02  0.00     100.00m  0.00      emc0_dd2 ENA

After trying repeatedly a number of times, it shows that the device has come online and plex is recovered now.
This shows that vxattachd has done its job.
============================================================

[root@server1]# vxdisk -o alldgs list
DEVICE       TYPE            DISK         GROUP        STATUS
emc0_dd1     auto:cdsdisk    appdg01      appdg        online
emc0_dd2     auto:cdsdisk    appdg02      appdg        online
emc0_dd3     auto:cdsdisk    appdg03      appdg        online
emc0_dd4     auto:cdsdisk    appdg04      appdg        online
emc0_dd5     auto:cdsdisk    -            -            online
emc0_dd6     auto:none       -            -            online invalid
emc0_dd7     auto:none       -            -            online invalid
emc0_dd8     auto:none       -            -            online invalid
emc0_dd9     auto:none       -            -            online invalid
emc0_d10     auto:none       -            -            online invalid
emc0_d11     auto:none       -            -            online invalid
emc0_d12     auto:none       -            -            online invalid
sda          auto:none       -            -            online invalid
sdb          auto:none       -            -            online invalid

[root@server1]# vxprint -g appdg -htu h
DG NAME         NCONFIG      NLOG     MINORS   GROUP-ID
ST NAME         STATE        DM_CNT   SPARE_CNT         APPVOL_CNT
DM NAME         DEVICE       TYPE     PRIVLEN  PUBLEN   STATE
RV NAME         RLINK_CNT    KSTATE   STATE    PRIMARY  DATAVOLS  SRL
RL NAME         RVG          KSTATE   STATE    REM_HOST REM_DG    REM_RLNK
CO NAME         CACHEVOL     KSTATE   STATE
VT NAME         RVG          KSTATE   STATE    NVOLUME
V  NAME         RVG/VSET/CO  KSTATE   STATE    LENGTH   READPOL   PREFPLEX UTYPE
PL NAME         VOLUME       KSTATE   STATE    LENGTH   LAYOUT    NCOL/WID MODE
SD NAME         PLEX         DISK     DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
SV NAME         PLEX         VOLNAME  NVOLLAYR LENGTH   [COL/]OFF AM/NM    MODE
SC NAME         PLEX         CACHE    DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
DC NAME         PARENTVOL    LOGVOL
SP NAME         SNAPVOL      DCO
EX NAME         ASSOC        VC                       PERMS    MODE     STATE
SR NAME         KSTATE
dg appdg        default      default  1000     1422525023.21.sym1
dm appdg01      emc0_dd1     auto     32.00m   1.96g    -
dm appdg02      emc0_dd2     auto     32.00m   1.96g    -
dm appdg03      emc0_dd3     auto     32.00m   1.96g    -
dm appdg04      emc0_dd4     auto     32.00m   1.96g    -
v  appvol       -            ENABLED  ACTIVE   100.00m  SELECT    -        fsgen
pl appvol-01    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg01-01   appvol-01    appdg01  0.00     100.00m  0.00      emc0_dd1 ENA
pl appvol-02    appvol       ENABLED  ACTIVE   100.00m  CONCAT    -        RW
sd appdg02-01   appvol-02    appdg02  0.00     100.00m  0.00      emc0_dd2 ENA

No comments:

Post a Comment