Re: Possible memory leak in gluster samba vfs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Avati,

It seem no use with your suggestion to remove entries for page size less than 4KB(even 4KB), the RES also increased with smbd.


Thanks
-Kane

在 2013-9-25,上午10:12,Anand Avati <avati@xxxxxxxxxxx> 写道:

On Tue, Sep 24, 2013 at 6:37 PM, haiwei.xie-soulinfo <haiwei.xie@xxxxxxxxxxxx> wrote:
hi,

   Our patch for this bug,  running looks good. smbd will not exit with oom-kill. But it's not correct method.

git version: release-3.4/886021a31bdac83c2124d08d64b81f22d82039d6

diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index 66e7d69..535ee53 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -713,7 +713,9 @@ glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
        }

        size = iov_length (iovec, iovcnt);
-
+#define MIN_LEN 8 * 1024
+       if (size < MIN_LEN)
+               size = MIN_LEN;
        iobuf = iobuf_get2 (subvol->ctx->iobuf_pool, size);
        if (!iobuf) {
                ret = -1;



Ah, looks like we need to tune the page_size/num_pages table in libglusterfs/src/iobuf.c. The table is allowing for too small pages. We should probably remove entries for page size less than 4KB. Just doing that might fix your issue:

diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index a89e962..0269004 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -24,9 +24,7 @@
 /* Make sure this array is sorted based on pagesize */
 struct iobuf_init_config gf_iobuf_init_config[] = {
         /* { pagesize, num_pages }, */
-        {128, 1024},
-        {512, 512},
-        {2 * 1024, 512},
+        {4 * 1024, 256},
         {8 * 1024, 128},
         {32 * 1024, 64},
         {128 * 1024, 32},

Avati

 
> On 09/13/2013 06:03 PM, kane wrote:
> > Hi
> >
> > We use samba gluster vfs in IO test, but meet with gluster server smbd
> > oom killer,
> > The smbd process spend over 15g RES with top command show, in the end
> > is our simple test code:
> >
> > gluster server vfs --> smbd --> client mount dir "/mnt/vfs"--> execute
> > vfs test program "$ ./vfs 1000"
> >
> > then we can watch gluster server smbd RES with top command.
> >
> > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
> > 4000 soul 20 0 5486m 4.9g 10m R 108.4 31.5 111:07.07 smbd
> > 3447 root 20 0 1408m 44m 2428 S 44.4 0.3 59:11.55 glusterfsd
> >
> > io test code:
> > =======================================
> > #define _LARGEFILE64_SOURCE
> >
> > #include <stdio.h>
> > #include <unistd.h>
> > #include <string.h>
> > #include <pthread.h>
> > #include <stdlib.h>
> > #include <fcntl.h>
> > #include <sys/types.h>
> >
> > int WT = 1;
> >
> > #define RND(x) ((x>0)?(genrand() % (x)):0)
> > extern unsigned long genrand();
> > extern void sgenrand();
> >
> > /* Period parameters */
> > #define N 624
> > #define M 397
> > #define MATRIX_A 0x9908b0df /* constant vector a */
> > #define UPPER_MASK 0x80000000 /* most significant w-r bits */
> > #define LOWER_MASK 0x7fffffff /* least significant r bits */
> >
> > /* Tempering parameters */
> > #define TEMPERING_MASK_B 0x9d2c5680
> > #define TEMPERING_MASK_C 0xefc60000
> > #define TEMPERING_SHIFT_U(y) (y >> 11)
> > #define TEMPERING_SHIFT_S(y) (y << 7)
> > #define TEMPERING_SHIFT_T(y) (y << 15)
> > #define TEMPERING_SHIFT_L(y) (y >> 18)
> >
> > static unsigned long mt[N]; /* the array for the state vector */
> > static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
> >
> > /* Initializing the array with a seed */
> > void
> > sgenrand(seed)
> > unsigned long seed;
> > {
> > int i;
> >
> > for (i=0;i<N;i++) {
> > mt[i] = seed & 0xffff0000;
> > seed = 69069 * seed + 1;
> > mt[i] |= (seed & 0xffff0000) >> 16;
> > seed = 69069 * seed + 1;
> > }
> > mti = N;
> > }
> >
> > unsigned long
> > genrand()
> > {
> > unsigned long y;
> > static unsigned long mag01[2]={0x0, MATRIX_A};
> > /* mag01[x] = x * MATRIX_A for x=0,1 */
> >
> > if (mti >= N) { /* generate N words at one time */
> > int kk;
> >
> > if (mti == N+1) /* if sgenrand() has not been called, */
> > sgenrand(4357); /* a default initial seed is used */
> >
> > for (kk=0;kk<N-M;kk++) {
> > y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
> > mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1];
> > }
> > for (;kk<N-1;kk++) {
> > y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
> > mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1];
> > }
> > y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
> > mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1];
> >
> > mti = 0;
> > }
> > y = mt[mti++];
> > y ^= TEMPERING_SHIFT_U(y);
> > y ^= TEMPERING_SHIFT_S(y) & TEMPERING_MASK_B;
> > y ^= TEMPERING_SHIFT_T(y) & TEMPERING_MASK_C;
> > y ^= TEMPERING_SHIFT_L(y);
> >
> > return y;
> > }
> >
> > char *initialize_file_source(int size)
> > {
> > char *new_source;
> > int i;
> >
> > if ((new_source=(char *)malloc(size))==NULL) /* allocate buffer */
> > fprintf(stderr,"Error: failed to allocate source file of size %d\n",size);
> > else
> > for (i=0; i<size; i++) /* file buffer with junk */
> > new_source[i]=32+RND(95);
> >
> > return(new_source);
> > }
> >
> > void *tran_file(void *map)
> > {
> > int block_size = 512;
> > char *read_buffer; /* temporary space for reading file data into */
> > int fd = open((char *)map, O_RDWR | O_CREAT | O_TRUNC, 0644);
> > if(fd == -1) {
> > perror("open");
> > return ;
> > }
> >
> > //read_buffer=(char *)malloc(block_size);
> > //memset(read_buffer, 0x56, block_size);
> > read_buffer = initialize_file_source(block_size);
> > while(WT){
> > sleep(1);
> > pwrite(fd, read_buffer, block_size, 0);
> > pread(fd, read_buffer, block_size, 0);
> > }
> > free(read_buffer);
> > close(fd);
> > }
> >
> > int main(int argc, char *argv[])
> > {
> > if(argc == 1) {
> > printf("Set file name and file size in GB!\n");
> > return -1;
> > }
> >
> > unsigned long long length = atoll(argv[1]) * 1024 * 1024 * 1024;
> > unsigned long long index = 0;
> > unsigned long long count = 0;
> > int fd = 0;
> > char path[32];
> > char *swap = NULL;
> > int psize = 65536;
> > int ret = 0;
> > int ret_i = 0;
> > void *tret;
> > pthread_t rtid_1;
> > pthread_t rtid_2;
> > pthread_t rtid_3;
> > pthread_t rtid_4;
> > int fd_i = 0;
> > int a_size = 256;
> > char *append_buffer; /* temporary space for reading file data into */
> >
> > swap = (char *)malloc(sizeof(char) * psize);
> > if(swap == NULL) {
> > printf("Malloc Swap Failed!\n");
> > close(fd);
> > return -1;
> > }
> > memset(swap, 0x55, psize);
> >
> > append_buffer = initialize_file_source(a_size);
> >
> > pthread_create(&rtid_1, NULL, tran_file, "/mnt/vfs/a.map");
> > pthread_create(&rtid_2, NULL, tran_file, "/mnt/vfs/b.map");
> > pthread_create(&rtid_3, NULL, tran_file, "/mnt/vfs/c.map");
> > pthread_create(&rtid_4, NULL, tran_file, "/mnt/vfs/d.map");
> >
> > while(index < length)
> > {
> > if((index % (1024*1024*1024)) == 0) {
> > count++;
> > sprintf(path, "/mnt/vfs/data/%08lld.dat", count);
> > if((fd = open(path, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR)) < 0)
> > {
> > printf("Can not open io file: %s\n", path);
> > perror("open");
> > return -1;
> > }
> > sprintf(path, "/mnt/vfs/data/%08lld.index", count);
> > if((fd_i = open(path, O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR))
> > < 0)
> > {
> > printf("Can not open io file: %s\n", path);
> > perror("open");
> > return -1;
> > }
> > printf("...1GiB\n");
> > }
> >
> > ret = pwrite(fd, swap, psize, (index % (1024*1024*1024)));
> > if(ret != psize) {
> > perror("write");
> > printf("Write data file failed!\n");
> > continue;
> > }
> > lseek64(fd_i, ((index*a_size)/65536), SEEK_SET);
> > ret_i = write(fd_i, append_buffer, a_size);
> > if(ret_i != a_size) {
> > perror("write");
> > printf("Write index file failed!\n");
> > continue;
> > }
> >
> > index += ret;
> > if((index % (1024*1024*1024)) == 0) {
> > close(fd);
> > close(fd_i);
> > }
> > }
> >
> > WT = 0;
> > pthread_join(rtid_1, &tret);
> > pthread_join(rtid_2, &tret);
> > pthread_join(rtid_3, &tret);
> > pthread_join(rtid_4, &tret);
> > printf("%s GB write\n", argv[1]);
> > sync();
> > free(swap);
> > free(append_buffer);
> >
> > return 0;
> > }
> >
> > =======================================
> >
> >
> >
> >
> > kane
> > ----------------------------------------------------------------
> > Email: kai.zhou@xxxxxxxxxxxx <mailto:kai.zhou@xxxxxxxxxxxx>
> > 电话: 0510-85385788-616
> >
> >
> Thanks for the code. I will try to reproduce the issue using your code.
>
> -Lala


--
谢海威
软件项目经理
电话:  +86 10-68920588
手机:  +86 13911703586
Email:  haiwei.xie@xxxxxxxxxxxx

_______________________________________________
Gluster-devel mailing list
Gluster-devel@xxxxxxxxxx
https://lists.nongnu.org/mailman/listinfo/gluster-devel



[Index of Archives]     [Gluster Users]     [Ceph Users]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux