very slow file deletion on an SSD

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi folks:

Just ran into this (see posted output at bottom). 3.2.14 kernel, MD RAID 5, xfs file system. Not sure (precisely) where the problem is, hence posting to both lists.

 [root@siFlash ~]# cat /proc/mdstat
Personalities : [raid1] [raid6] [raid5] [raid4]
md22 : active raid5 sdl[0] sds[7] sdx[6] sdu[5] sdk[4] sdz[3] sdw[2] sdr[1]
1641009216 blocks super 1.2 level 5, 32k chunk, algorithm 2 [8/8] [UUUUUUUU]

md20 : active raid5 sdh[0] sdf[7] sdm[6] sdd[5] sdc[4] sde[3] sdi[2] sdg[1]
1641009216 blocks super 1.2 level 5, 32k chunk, algorithm 2 [8/8] [UUUUUUUU]

md21 : active raid5 sdy[0] sdq[7] sdp[6] sdo[5] sdn[4] sdj[3] sdv[2] sdt[1]
1641009216 blocks super 1.2 level 5, 32k chunk, algorithm 2 [8/8] [UUUUUUUU]

md0 : active raid1 sdb1[1] sda1[0]
      93775800 blocks super 1.0 [2/2] [UU]
      bitmap: 1/1 pages [4KB], 65536KB chunk


md2* are SSD RAID5 arrays we are experimenting with. Xfs file systems atop them:

[root@siFlash ~]# mount | grep md2
/dev/md20 on /data/1 type xfs (rw)
/dev/md21 on /data/2 type xfs (rw)
/dev/md22 on /data/3 type xfs (rw)

vanilla mount options (following Dave Chinner's long standing advice)

meta-data=/dev/md20 isize=2048 agcount=32, agsize=12820392 blks
         =                       sectsz=512   attr=2
data     =                       bsize=4096   blocks=410252304, imaxpct=5
         =                       sunit=8      swidth=56 blks
naming   =version 2              bsize=65536  ascii-ci=0
log      =internal               bsize=4096   blocks=30720, version=2
         =                       sectsz=512   sunit=8 blks, lazy-count=1
realtime =none                   extsz=4096   blocks=0, rtextents=0

[root@siFlash ~]# mdadm --detail /dev/md20
/dev/md20:
        Version : 1.2
  Creation Time : Sun Apr  1 19:36:39 2012
     Raid Level : raid5
     Array Size : 1641009216 (1564.99 GiB 1680.39 GB)
  Used Dev Size : 234429888 (223.57 GiB 240.06 GB)
   Raid Devices : 8
  Total Devices : 8
    Persistence : Superblock is persistent

    Update Time : Fri May 25 06:26:23 2012
          State : clean
 Active Devices : 8
Working Devices : 8
 Failed Devices : 0
  Spare Devices : 0

         Layout : left-symmetric
     Chunk Size : 32K

           Name : siFlash.sicluster:20
           UUID : 2f023323:6ec29eb9:a943de06:f6e0c25d
         Events : 296

    Number   Major   Minor   RaidDevice State
       0       8      112        0      active sync   /dev/sdh
       1       8       96        1      active sync   /dev/sdg
       2       8      128        2      active sync   /dev/sdi
       3       8       64        3      active sync   /dev/sde
       4       8       32        4      active sync   /dev/sdc
       5       8       48        5      active sync   /dev/sdd
       6       8      192        6      active sync   /dev/sdm
       7       8       80        7      active sync   /dev/sdf

All the SSDs are on deadline scheduler

[root@siFlash ~]# cat /sys/block/sd*/queue/scheduler | uniq
noop [deadline] cfq


All this said, deletes from this unit are taking 1-2 seconds per file ...

[root@siFlash ~]# strace -ttt -T rm -f /data/2/test/*
1337941514.040788 execve("/bin/rm", ["rm", "-f", "/data/2/test/2.8t-r.97.0", "/data/2/test/2.8t-r.98.0", "/data/2/test/2.8t-r.99.0", "/data/2/test/2.9.0"], [/* 40 vars */]) = 0 <0.000552>
1337941514.041713 brk(0)                = 0x60d000 <0.000031>
1337941514.041927 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7bc2779000 <0.000032> 1337941514.042113 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000109>
1337941514.042395 open("/etc/ld.so.cache", O_RDONLY) = 3 <0.000050>
1337941514.042614 fstat(3, {st_mode=S_IFREG|0644, st_size=81118, ...}) = 0 <0.000102> 1337941514.042928 mmap(NULL, 81118, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7bc2765000 <0.000042>
1337941514.043078 close(3)              = 0 <0.000019>
1337941514.043235 open("/lib64/libc.so.6", O_RDONLY) = 3 <0.000115>
1337941514.043477 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360\355\301W4\0\0\0"..., 832) = 832 <0.000039> 1337941514.043647 fstat(3, {st_mode=S_IFREG|0755, st_size=1908792, ...}) = 0 <0.000020> 1337941514.043860 mmap(0x3457c00000, 3733672, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3457c00000 <0.000085>
1337941514.044065 mprotect(0x3457d86000, 2097152, PROT_NONE) = 0 <0.000034>
1337941514.044191 mmap(0x3457f86000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x186000) = 0x3457f86000 <0.000034> 1337941514.044388 mmap(0x3457f8b000, 18600, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3457f8b000 <0.000085>
1337941514.044592 close(3)              = 0 <0.000058>
1337941514.044763 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7bc2764000 <0.000039> 1337941514.044893 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7bc2763000 <0.000020> 1337941514.044981 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7bc2762000 <0.000018>
1337941514.045076 arch_prctl(ARCH_SET_FS, 0x7f7bc2763700) = 0 <0.000018>
1337941514.045183 mprotect(0x3457f86000, 16384, PROT_READ) = 0 <0.000023>
1337941514.045270 mprotect(0x345761f000, 4096, PROT_READ) = 0 <0.000019>
1337941514.045350 munmap(0x7f7bc2765000, 81118) = 0 <0.000028>
1337941514.045619 brk(0)                = 0x60d000 <0.000017>
1337941514.045698 brk(0x62e000)         = 0x62e000 <0.000018>
1337941514.045803 open("/usr/lib/locale/locale-archive", O_RDONLY) = 3 <0.000028> 1337941514.045904 fstat(3, {st_mode=S_IFREG|0644, st_size=99158704, ...}) = 0 <0.000017> 1337941514.046012 mmap(NULL, 99158704, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7bbc8d1000 <0.000020>
1337941514.046099 close(3)              = 0 <0.000017>
1337941514.046235 ioctl(0, SNDCTL_TMR_TIMEBASE or TCGETS, {B38400 opost isig icanon echo ...}) = 0 <0.000020> 1337941514.046373 newfstatat(AT_FDCWD, "/data/2/test/2.8t-r.97.0", {st_mode=S_IFREG|0600, st_size=1073741824, ...}, AT_SYMLINK_NOFOLLOW) = 0 <0.000024> 1337941514.046504 unlinkat(AT_FDCWD, "/data/2/test/2.8t-r.97.0", 0) = 0 <1.357571> 1337941515.404257 newfstatat(AT_FDCWD, "/data/2/test/2.8t-r.98.0", {st_mode=S_IFREG|0600, st_size=1073741824, ...}, AT_SYMLINK_NOFOLLOW) = 0 <0.000072> 1337941515.404485 unlinkat(AT_FDCWD, "/data/2/test/2.8t-r.98.0", 0) = 0 <1.608016> 1337941517.012706 newfstatat(AT_FDCWD, "/data/2/test/2.8t-r.99.0", {st_mode=S_IFREG|0600, st_size=1073741824, ...}, AT_SYMLINK_NOFOLLOW) = 0 <0.000082> 1337941517.012957 unlinkat(AT_FDCWD, "/data/2/test/2.8t-r.99.0", 0) = 0 <1.133890> 1337941518.146983 newfstatat(AT_FDCWD, "/data/2/test/2.9.0", {st_mode=S_IFREG|0600, st_size=8589934592, ...}, AT_SYMLINK_NOFOLLOW) = 0 <0.000023>
1337941518.147145 unlinkat(AT_FDCWD, "/data/2/test/2.9.0", 0) = 0 <0.938754>
1337941519.086125 close(0)              = 0 <0.000102>
1337941519.086357 close(1)              = 0 <0.000061>
1337941519.086540 close(2)              = 0 <0.000021>
1337941519.086694 exit_group(0)         = ?

Anything obvious that we are doing wrong?


Machine may be occupied for a bit. Might be a few days before we can get results back.


--
Joseph Landman, Ph.D
Founder and CEO
Scalable Informatics Inc.
email: landman@xxxxxxxxxxxxxxxxxxxxxxx
web  : http://scalableinformatics.com
       http://scalableinformatics.com/sicluster
phone: +1 734 786 8423 x121
fax  : +1 866 888 3112
cell : +1 734 612 4615

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux