Hi, I'm trying to optimize the dmix because I'm working with a big number of channels (up to 16) and in this case the dmix has not a negligible impact on performance. I'm working with ALSA 1.1.9. I gave my first look to the generic_mix_areas_16_native function (https://github.com/alsa-project/alsa-lib/blob/v1.1.9/src/pcm/pcm_dmix_generic.c#L130). I would ask you if I can avoid to check, for each loop iteration, if the current dst sample is not 0. for (;;) { sample = *src; if (! *dst) { *sum = sample; *dst = *src; } else { sample += *sum; *sum = sample; if (sample > 0x7fff) sample = 0x7fff; else if (sample < -0x8000) sample = -0x8000; *dst = sample; } if (!--size) return; src = (signed short *) ((char *)src + src_step); dst = (signed short *) ((char *)dst + dst_step); sum = (signed int *) ((char *)sum + sum_step); } Could it be possible check for the first sample of the period only, as reported in the code below? My assumption is that if dst[0] is 0 also dst[1] ... dst[period-1] will be 0, and I don't need to check every time. This is already an optimization, but it could be also a starting point for other optimization based on my HW. But, first of all, I would ask to you if my assumption is right. if (! *dst) { for (;;) { sample = *src; *sum = sample; *dst = *src; if (!--size) return; src = (signed short *) ((char *)src + src_step); dst = (signed short *) ((char *)dst + dst_step); sum = (signed int *) ((char *)sum + sum_step); } } else { for (;;) { sample = *src; sample += *sum; *sum = sample; if (sample > 0x7fff) sample = 0x7fff; else if (sample < -0x8000) sample = -0x8000; *dst = sample; if (!--size) return; src = (signed short *) ((char *)src + src_step); dst = (signed short *) ((char *)dst + dst_step); sum = (signed int *) ((char *)sum + sum_step); } } Thank you! Best Regards, Giuliano