On Mon, 2016-02-15 at 13:12 +0200, Tanu Kaskinen wrote: > On Mon, 2016-01-18 at 13:06 +0530, arun at accosted.net wrote: > > From: Arun Raghavan <git at arunraghavan.net> > > > > --- > > Â src/modules/echo-cancel/webrtc.cc | 138 +++++++++++++++++++++++++++++++++++++- > > Â 1 file changed, 137 insertions(+), 1 deletion(-) > > > > diff --git a/src/modules/echo-cancel/webrtc.cc b/src/modules/echo-cancel/webrtc.cc > > index 5f00286..9ae5d77 100644 > > --- a/src/modules/echo-cancel/webrtc.cc > > +++ b/src/modules/echo-cancel/webrtc.cc > > @@ -52,6 +52,7 @@ PA_C_DECL_END > > Â #define DEFAULT_INTELLIGIBILITY_ENHANCER false > > Â #define DEFAULT_EXPERIMENTAL_AGC false > > Â #define DEFAULT_AGC_START_VOLUME 85 > > +#define DEFAULT_BEAMFORMING false > > Â #define DEFAULT_TRACE false > > Â > > Â #define WEBRTC_AGC_MAX_VOLUME 255 > > @@ -70,6 +71,9 @@ static const char* const valid_modargs[] = { > > Â Â Â Â Â "intelligibility_enhancer", > > Â Â Â Â Â "experimental_agc", > > Â Â Â Â Â "agc_start_volume", > > +Â Â Â Â "beamforming", > > +Â Â Â Â "mic_geometry", /* documented in parse_mic_geometry() */ > > +Â Â Â Â "target_direction", /* documented in parse_mic_geometry() */ > > Â Â Â Â Â "trace", > > Â Â Â Â Â NULL > > Â }; > > @@ -138,6 +142,76 @@ static void pa_webrtc_ec_fixate_spec(pa_sample_spec *rec_ss, pa_channel_map *rec > > Â Â Â Â Â play_ss->rate = rec_ss->rate; > > Â } > > Â > > +static bool parse_point(const char **point, float (&f)[3]) { > > +Â Â Â Â int ret, length; > > + > > +Â Â Â Â ret = sscanf(*point, "%g,%g,%g%n", &f[0], &f[1], &f[2], &length); > > +Â Â Â Â if (ret != 3) > > +Â Â Â Â Â Â Â Â return false; > > + > > +Â Â Â Â /* Consume the bytes we've read so far */ > > +Â Â Â Â *point += length; > > + > > +Â Â Â Â /* Consume trailing comma if there is one */ > > +Â Â Â Â if (**point == ',') > > +Â Â Â Â Â Â Â Â (*point)++; > > I don't think this should be done in this function. When expecting only > one point, we should fail on "1,2,3," but this code accepts that > string. Okay. > > + > > +Â Â Â Â return true; > > +} > > + > > +static bool parse_mic_geometry(const char **mic_geometry, std::vector& geometry) { > > +Â Â Â Â /* The microphone geometry is expressed as cartesian point form: > > +Â Â Â Â Â *Â Â Â x1,y1,z1,x2,y2,z2,... > > +Â Â Â Â Â * > > +Â Â Â Â Â * Where x1,y1,z1 is the position of the first microphone with regards to > > +Â Â Â Â Â * the array's "center", x2,y2,z2 the position of the second, and so on. > > +Â Â Â Â Â * > > +Â Â Â Â Â * 'x' is the horizontal coordinate, with positive values being to the > > +Â Â Â Â Â * right from the mic array's perspective. > > +Â Â Â Â Â * > > +Â Â Â Â Â * 'y' is the depth coordinate, with positive values being in front of the > > +Â Â Â Â Â * array. > > +Â Â Â Â Â * > > +Â Â Â Â Â * 'z' is the vertical coordinate, with positive values being above the > > +Â Â Â Â Â * array. > > +Â Â Â Â Â * > > +Â Â Â Â Â * All distances are in meters. > > +Â Â Â Â Â */ > > It's a bit unclear what "the mic array's perspective" means. Should I > imagine the array as a person standing upright, looking straight ahead? > Then positive x is to the right from the "person's" point of view, > positive y is in front of the array guy and positive z is above the > array. Yes. > > + > > +Â Â Â Â /* The target direction is expected to be in spherical point form: > > +Â Â Â Â Â *Â Â Â a,e,r > > +Â Â Â Â Â * > > +Â Â Â Â Â * Where 'a is the azimuth of the first mic channel, 'e' its elevation, > > +Â Â Â Â Â * and 'r' the radius. > > +Â Â Â Â Â * > > +Â Â Â Â Â * 0 radians azimuth is to the right of the array, and positive angles > > +Â Â Â Â Â * move in a counter-clockwise direction. > > +Â Â Â Â Â * > > +Â Â Â Â Â * 0 radians elevation is horizontal w.r.t. the array, and positive > > +Â Â Â Â Â * angles go upwards. > > +Â Â Â Â Â * > > +Â Â Â Â Â * radius is distance from the array center in meters. > > +Â Â Â Â Â */ > > + > > +Â Â Â Â int i; > > +Â Â Â Â float f[3]; > > + > > +Â Â Â Â for (i = 0; i < geometry.size(); i++) { > > +Â Â Â Â Â Â Â Â if (!parse_point(mic_geometry, f)) { > > +Â Â Â Â Â Â Â Â Â Â Â Â pa_log("Failed to parse channel %d in mic_geometry", i); > > +Â Â Â Â Â Â Â Â Â Â Â Â return false; > > +Â Â Â Â Â Â Â Â } > > + > > +Â Â Â Â Â Â Â Â pa_log_debug("Got mic #%d position: (%g, %g, %g)", i, f[0], f[1], f[2]); > > + > > +Â Â Â Â Â Â Â Â geometry[i].c[0] = f[0]; > > +Â Â Â Â Â Â Â Â geometry[i].c[1] = f[1]; > > +Â Â Â Â Â Â Â Â geometry[i].c[2] = f[2]; > > +Â Â Â Â } > > + > > +Â Â Â Â return true; > > +} > > + > > Â bool pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec, > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â pa_sample_spec *rec_ss, pa_channel_map *rec_map, > > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â pa_sample_spec *play_ss, pa_channel_map *play_map, > > @@ -146,7 +220,7 @@ bool pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec, > > Â Â Â Â Â webrtc::AudioProcessing *apm = NULL; > > Â Â Â Â Â webrtc::ProcessingConfig pconfig; > > Â Â Â Â Â webrtc::Config config; > > -Â Â Â Â bool hpf, ns, agc, dgc, mobile, cn, vad, ext_filter, intelligibility, experimental_agc; > > +Â Â Â Â bool hpf, ns, agc, dgc, mobile, cn, vad, ext_filter, intelligibility, experimental_agc, beamforming; > > Â Â Â Â Â int rm = -1; > > Â Â Â Â Â uint32_t agc_start_volume; > > Â Â Â Â Â pa_modargs *ma; > > @@ -255,6 +329,12 @@ bool pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec, > > Â Â Â Â Â } > > Â Â Â Â Â ec->params.webrtc.agc_start_volume = agc_start_volume; > > Â > > +Â Â Â Â beamforming = DEFAULT_BEAMFORMING; > > +Â Â Â Â if (pa_modargs_get_value_boolean(ma, "beamforming", &beamforming) < 0) { > > +Â Â Â Â Â Â Â Â pa_log("Failed to parse beamforming value"); > > +Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â } > > + > > Â Â Â Â Â if (ext_filter) > > Â Â Â Â Â Â Â Â Â config.Set(new webrtc::ExtendedFilter(true)); > > Â Â Â Â Â if (intelligibility) > > @@ -276,6 +356,62 @@ bool pa_webrtc_ec_init(pa_core *c, pa_echo_canceller *ec, > > Â > > Â Â Â Â Â pa_webrtc_ec_fixate_spec(rec_ss, rec_map, play_ss, play_map, out_ss, out_map); > > Â > > +Â Â Â Â /* We do this after fixate because we need the capture channel count */ > > +Â Â Â Â if (beamforming) { > > +Â Â Â Â Â Â Â Â std::vector geometry(rec_ss->channels); > > +Â Â Â Â Â Â Â Â webrtc::SphericalPointf direction(0.0f, 0.0f, 0.0f); > > +Â Â Â Â Â Â Â Â const char *mic_geometry, *target_direction; > > + > > +Â Â Â Â Â Â Â Â if (!(mic_geometry = pa_modargs_get_value(ma, "mic_geometry", NULL))) { > > +Â Â Â Â Â Â Â Â Â Â Â Â pa_log("mic_geometry must be set if beamforming is enabled"); > > +Â Â Â Â Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â Â Â Â Â } > > + > > +Â Â Â Â Â Â Â Â if (!parse_mic_geometry(&mic_geometry, geometry)) { > > +Â Â Â Â Â Â Â Â Â Â Â Â pa_log("Failed to parse mic_geometry value"); > > +Â Â Â Â Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â Â Â Â Â } > > + > > +Â Â Â Â Â Â Â Â if (*mic_geometry != '\0') { > > +Â Â Â Â Â Â Â Â Â Â Â Â pa_log("Failed to parse mic_geometry value: more parameters than expected"); > > +Â Â Â Â Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â Â Â Â Â } > > This check could be moved to parse_mic_geometry(). Fair enough. > > + > > +Â Â Â Â Â Â Â Â if ((target_direction = pa_modargs_get_value(ma, "target_direction", NULL))) { > > +Â Â Â Â Â Â Â Â Â Â Â Â float f[3]; > > + > > +Â Â Â Â Â Â Â Â Â Â Â Â if (!parse_point(&target_direction, f)) { > > +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â pa_log("Failed to parse target_direction value"); > > +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â Â Â Â Â Â Â Â Â } > > + > > +Â Â Â Â Â Â Â Â Â Â Â Â if (*target_direction != '\0') { > > +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â pa_log("Failed to parse target_direction value: more parameters than expected"); > > +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â Â Â Â Â Â Â Â Â } > > + > > +#define IS_ZERO(f) ((f) < 0.000001 && (f) > -0.000001) > > This kind of macro would be nice to have in some shared location. > speex.c and a couple of tests have similar code, and I remember needing > it in the volume control patches. I can do that in a separate commit. > > + > > + Â Â Â Â if (!IS_ZERO(f[1]) || !IS_ZERO(f[2])) { > > This line contains a tab character. Fixed. > > +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â pa_log("The beamformer currently only supports targeting along the azimuth"); > > +Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â goto fail; > > +Â Â Â Â Â Â Â Â Â Â Â Â } > > + > > +Â Â Â Â Â Â Â Â Â Â Â Â direction.s[0] = f[0]; > > +Â Â Â Â Â Â Â Â Â Â Â Â direction.s[1] = f[1]; > > +Â Â Â Â Â Â Â Â Â Â Â Â direction.s[2] = f[2]; > > +Â Â Â Â Â Â Â Â } > > + > > +Â Â Â Â Â Â Â Â /* The beamformer gives us a single channel */ > > +Â Â Â Â Â Â Â Â out_ss->channels = 1; > > +Â Â Â Â Â Â Â Â pa_channel_map_init_mono(out_map); > > Could this be moved to fixate_spec()? I don't like the audio format > parameters being calculated all over the place. I wanted to do that, but we can't because we need the capture channel count for parsing the mic geometry (there is a comment explaining that where if (beamforming) { ... } is). -- Arun