On Fri, May 29, 2020 at 12:10:03PM +0200, Paulo de Rezende Pinatti wrote: > Introduce two utility functions to parse a kernel command > line string according to the kernel code parsing rules in > order to enable the caller to perform operations such as > verifying whether certain argument=value combinations are > present or retrieving an argument's value. > > Signed-off-by: Paulo de Rezende Pinatti <ppinatti@xxxxxxxxxxxxx> > Signed-off-by: Boris Fiuczynski <fiuczy@xxxxxxxxxxxxx> > --- > src/libvirt_private.syms | 2 + > src/util/virutil.c | 169 +++++++++++++++++++++++++++++++++++++++ > src/util/virutil.h | 17 ++++ > tests/utiltest.c | 141 ++++++++++++++++++++++++++++++++ > 4 files changed, 329 insertions(+) > > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index a6af44fe1c..a206a943c5 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -3433,6 +3433,8 @@ virHostGetDRMRenderNode; > virHostHasIOMMU; > virIndexToDiskName; > virIsDevMapperDevice; > +virKernelCmdlineMatchParam; > +virKernelCmdlineNextParam; > virMemoryLimitIsSet; > virMemoryLimitTruncate; > virMemoryMaxValue; > diff --git a/src/util/virutil.c b/src/util/virutil.c > index fb46501142..749c9d7116 100644 > --- a/src/util/virutil.c > +++ b/src/util/virutil.c > @@ -1725,6 +1725,175 @@ virHostGetDRMRenderNode(void) > return ret; > } > > + > +static const char *virKernelCmdlineSkipDbQuote(const char *cmdline, minor nitpick: we can call ^this simply ...SkipQuote, we don't need to be so specific about it being a double quotation mark, do we? > + bool *is_quoted) > +{ > + if (cmdline[0] == '"') { > + *is_quoted = !(*is_quoted); > + cmdline++; > + } > + return cmdline; > +} > + > + > +static size_t virKernelCmdlineSearchForward(const char *cmdline, > + bool *is_quoted, > + bool include_equal) Hmm, what if instead we tried to find and return the index of the '=' character but iterated all the way until the next applicable (i.e. taking quotation into account) space and saved that end of arg/arg=val parameter into **res. The caller of this function would then continue directly from *res with the next arg/arg=val parameter. We could then call this something like virKernelCmdlineFindEqual and return -1 if there is no '=' sign, indicating that it's a standalone parameter with no value. > +{ > + size_t index; > + > + for (index = 0; cmdline[index]; index++) { > + if ((!(*is_quoted) && g_ascii_isspace(cmdline[index])) || > + (include_equal && cmdline[index] == '=')) > + break; > + virKernelCmdlineSkipDbQuote(cmdline + index, is_quoted); > + } > + return index; > +} > + > + > +static size_t virKernelCmdlineNextSpace(const char *cmdline, > + bool *is_quoted) > +{ > + return virKernelCmdlineSearchForward(cmdline, is_quoted, false); > +} > + > + > +static size_t virKernelCmdlineNextSpaceOrEqual(const char *cmdline, > + bool *is_quoted) > +{ > + return virKernelCmdlineSearchForward(cmdline, is_quoted, true); > +} If we implement what I suggested above for virKernelCmdlineSearchForward, we won't need 2 wrappers for the same thing differentiating only in whether we do or do not need to search for the '=' sign as well. > + > + > +static char* virKernelArgNormalize(const char *arg) > +{ > + return virStringReplace(arg, "_", "-"); > +} > + > + > +static char* virKernelCmdlineArgNormalize(const char *cmdline, size_t offset) > +{ > + g_autofree char *param = g_strndup((cmdline), offset); > + > + return virKernelArgNormalize(param); See below for comments why we don't need this wrapper. > +} > + > + > +/* > + * Parse the kernel cmdline and store the next parameter in @param > + * and the value of @param in @val which can be NULL if @param has > + * no value. In addition returns the address right after @param=@value > + * for possible further processing. > + * > + * @cmdline: kernel command line string to be checked for next parameter > + * @param: pointer to hold retrieved parameter, will be NULL if none found > + * @val: pointer to hold retrieved value of @param > + * > + * Returns a pointer to address right after @param=@val in the > + * kernel command line, will point to the string's end (NULL) > + * in case no next parameter is found > + */ > +const char *virKernelCmdlineNextParam(const char *cmdline, > + char **param, > + char **val) > +{ > + size_t offset; > + bool is_quoted = false; > + *param = NULL; > + *val = NULL; > + > + virSkipSpaces(&cmdline); > + cmdline = virKernelCmdlineSkipDbQuote(cmdline, &is_quoted); > + offset = virKernelCmdlineNextSpaceOrEqual(cmdline, &is_quoted); if you get the index of the equal sign, but iterate all the way until the end of the arg/arg=val parameter, you can use index and do: *param = g_strndup(cmdline, equal_index); > + if (offset == 0) If we used something like char **res (taking it as a parameter to this function) to represent the rest of the unparsed cmdline, then it should be NULL in this case, so the check could remain with a slight adjustment (I haven't implemented it, but I do hope it should work :)) > + return cmdline; > + > + *param = virKernelCmdlineArgNormalize(cmdline, offset); ^This normalization should be done in virKernelCmdlineMatchParam instead. That way, you won't need a wrapper over virKernelArgNormalize. > + cmdline = cmdline + offset; > + /* param has no value */ > + if (*cmdline != '=') > + return cmdline; ^This check could then be dropped along with moving the cursor in the cmdline string. > + > + cmdline = virKernelCmdlineSkipDbQuote(++cmdline, &is_quoted); > + offset = virKernelCmdlineNextSpace(cmdline, &is_quoted); If we do the above, we should be able to ditch ^this second loop searching for the next space. > + if (cmdline[offset-1] == '"') > + *val = g_strndup(cmdline, offset-1); > + else > + *val = g_strndup(cmdline, offset); ^Here you'd have to do arithmetics using the *res variable instead. > + > + return cmdline + offset; We'd simply return *res; > +} > + > + > +#define VIR_CMDLINE_STR_CMP(kernel_val, caller_val, flags) \ > + (((flags & VIR_KERNEL_CMDLINE_FLAGS_CMP_EQ) && \ > + STREQ(kernel_val, caller_val)) || ((flags & VIR_KERNEL_CMDLINE_FLAGS_CMP_PREFIX) && \ > + STRPREFIX(kernel_val, caller_val))) > + > + > +/* > + * Try to match the provided kernel cmdline string with the provided @arg > + * and the list @values of possible values according to the matching strategy > + * defined in @flags. Possible options include: > + * - VIR_KERNEL_CMDLINE_FLAGS_CMP_PREFIX: do a substring comparison of values I know you used it in the following patches to match against the accepted values, but do we really need to match with a prefix, I'd be fine with simple stirng equality matching in all cases and not mix the matching strategy for both values and kernel arguments. > + * (uses size of value provided as input) > + * - VIR_KERNEL_CMDLINE_FLAGS_CMP_EQ: do a strict string comparison of values ^This should be default used in all cases IMO unless the prefix matching provides us with a considerable performance gain. > + * - VIR_KERNEL_CMDLINE_FLAGS_SEARCH_STICKY: first positive match satifies search is "sticky searching" some terminus technicus? If not, we probably should name this FLAGS_MATCH_FIRST and FLAGS_MATCH_LAST respectively. > + * (in case of multiple argument occurrences) > + * - VIR_KERNEL_CMDLINE_FLAGS_SEARCH_LAST: use the result of last argument occurence > + * (in case of multiple argument occurrences) > + * > + * @cmdline: kernel command line string to be checked for @arg > + * @arg: kernel command line argument > + * @values: array of possible values to match @arg > + * @len_values: size of array, it can be 0 meaning a match will be positive if the > + * argument has no value. > + * @flags: flag mask defining the strategy for matching and comparing > + * > + * Returns true if a match is found, false otherwise > + */ > +bool virKernelCmdlineMatchParam(const char *cmdline, > + const char *arg, > + const char **values, > + size_t len_values, > + virKernelCmdlineFlags flags) > +{ > + bool match = false; > + size_t i; > + const char *next = cmdline; > + g_autofree char *norm_arg = virKernelArgNormalize(arg); > + g_autofree char *kparam = NULL; > + g_autofree char *kval = NULL; ^These last two variables can be moved into the while loop, you won't need the explicit VIR_FREEs then. > + > + while (next[0] != '\0') { > + VIR_FREE(kparam); > + VIR_FREE(kval); > + next = virKernelCmdlineNextParam(next, &kparam, &kval); Insert a blank line in between all these "ifs" for better readability (I know the coding guideline we have doesn't mention it). > + if (!kparam) > + break; You'd do the normalization of the parsed arg value here. > + if (STRNEQ(kparam, norm_arg)) > + continue; > + if (!kval) { > + match = (len_values == 0) ? true : false; > + } else { > + match = false; > + for (i = 0; i < len_values; i++) { > + if (VIR_CMDLINE_STR_CMP(kval, values[i], flags)) { > + match = true; > + break; > + } > + } > + } > + if (match && (flags & VIR_KERNEL_CMDLINE_FLAGS_SEARCH_STICKY)) > + break; > + } > + > + return match; > +} > + > + > /* > * Get a password from the console input stream. > * The caller must free the returned password. > diff --git a/src/util/virutil.h b/src/util/virutil.h > index 49b4bf440f..7499b78153 100644 > --- a/src/util/virutil.h > +++ b/src/util/virutil.h > @@ -147,6 +147,23 @@ bool virHostHasIOMMU(void); > > char *virHostGetDRMRenderNode(void) G_GNUC_NO_INLINE; > > +typedef enum { > + VIR_KERNEL_CMDLINE_FLAGS_CMP_PREFIX = 1, > + VIR_KERNEL_CMDLINE_FLAGS_CMP_EQ = 2, > + VIR_KERNEL_CMDLINE_FLAGS_SEARCH_STICKY = 4, > + VIR_KERNEL_CMDLINE_FLAGS_SEARCH_LAST = 8, > +} virKernelCmdlineFlags; > + > +const char *virKernelCmdlineNextParam(const char *cmdline, > + char **param, > + char **val); > + > +bool virKernelCmdlineMatchParam(const char *cmdline, > + const char *arg, > + const char **values, > + size_t len_values, > + virKernelCmdlineFlags flags); > + > /** > * VIR_ASSIGN_IS_OVERFLOW: > * @rvalue: value that is checked (evaluated twice) > diff --git a/tests/utiltest.c b/tests/utiltest.c > index 5ae04585cb..01fb8c89f5 100644 > --- a/tests/utiltest.c > +++ b/tests/utiltest.c > @@ -254,6 +254,145 @@ testOverflowCheckMacro(const void *data G_GNUC_UNUSED) > } > > > +struct testKernelCmdlineNextParamData > +{ > + const char *cmdline; > + const char *param; > + const char *val; > + const char *next; > +}; > + > +static struct testKernelCmdlineNextParamData kEntries[] = { > + { "arg1 arg2 arg3=val1", "arg1", NULL, " arg2 arg3=val1" }, > + { "arg1=val1 arg2 arg3=val3 arg4", "arg1", "val1", " arg2 arg3=val3 arg4" }, > + { "arg3=val3 ", "arg3", "val3", " " }, > + { "arg3=val3", "arg3", "val3", "" }, > + { "arg-3=val3 arg4", "arg-3", "val3", " arg4" }, > + { "arg_3=val3 arg4", "arg-3", "val3", " arg4" }, > + { " arg_3=val3 arg4", "arg-3", "val3", " arg4" }, > + { " arg-3=val3 arg4", "arg-3", "val3", " arg4" }, > + { "arg2=\"value with spaces\" arg3=val3", "arg2", "value with spaces", " arg3=val3" }, > + { " arg2=\"value with spaces\" arg3=val3", "arg2", "value with spaces", " arg3=val3" }, > + { " \"arg2=value with spaces\" arg3=val3", "arg2", "value with spaces", " arg3=val3" }, > + { "arg2=\"val\"ue arg3", "arg2", "val\"ue", " arg3" }, > + { " arg3\" escaped=val2\"", "arg3\" escaped", "val2", "" }, ^Is this even valid for the kernel itself? Looking at [1], they clearly don't allow escaped \" in the arg/value. [1] https://github.com/torvalds/linux/blob/db54615e21419c3cb4d699a0b0aa16cc44d0e9da/lib/cmdline.c > + { " arg2longer=someval arg2=val2 arg3 ", "arg2longer", "someval", " arg2=val2 arg3 " }, > + { "=val1 arg2=val2", NULL, NULL, "=val1 arg2=val2" }, > + { " ", NULL, NULL, "" }, > + { "", NULL, NULL, "" }, > +}; > + > +static int > +testKernelCmdlineNextParam(const void *data G_GNUC_UNUSED) > +{ > + char *param = NULL; > + char *val = NULL; > + const char *next; > + size_t i; > + > + for (i = 0; i < G_N_ELEMENTS(kEntries); ++i) { > + VIR_FREE(param); > + VIR_FREE(val); > + > + next = virKernelCmdlineNextParam(kEntries[i].cmdline, ¶m, &val); > + > + if (STRNEQ_NULLABLE(param, kEntries[i].param) || > + STRNEQ_NULLABLE(val, kEntries[i].val) || > + STRNEQ(next, kEntries[i].next)) { > + VIR_TEST_DEBUG("\nKernel cmdline [%s]", kEntries[i].cmdline); > + VIR_TEST_DEBUG("Expect param [%s]", kEntries[i].param); > + VIR_TEST_DEBUG("Actual param [%s]", param); > + VIR_TEST_DEBUG("Expect value [%s]", kEntries[i].val); > + VIR_TEST_DEBUG("Actual value [%s]", val); > + VIR_TEST_DEBUG("Expect next [%s]", kEntries[i].next); > + VIR_TEST_DEBUG("Actual next [%s]", next); > + > + VIR_FREE(param); > + VIR_FREE(val); > + > + return -1; > + } > + } > + > + VIR_FREE(param); > + VIR_FREE(val); > + > + return 0; > +} I appreciate the thorough unit testing :) Erik