On Wed, Oct 13, 2021 at 9:51 AM Daniel Latypov <dlatypov@xxxxxxxxxx> wrote: > > On Fri, Oct 8, 2021 at 4:51 PM Daniel Latypov <dlatypov@xxxxxxxxxx> wrote: > > > > On Fri, Oct 8, 2021 at 2:08 PM Daniel Latypov <dlatypov@xxxxxxxxxx> wrote: > > > > > > kunit.py currently crashes and fails to parse kernel output if it's not > > > fully valid utf-8. > > > > > > This can come from memory corruption or or just inadvertently printing > > > out binary data as strings. > > > > > > E.g. adding this line into a kunit test > > > pr_info("\x80") > > > will cause this exception > > > UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 1961: invalid start byte > > > > > > We can tell Python how to handle errors, see > > > https://docs.python.org/3/library/codecs.html#error-handlers > > > > > > Unfortunately, it doesn't seem like there's a way to specify this in > > > just one location, so we need to repeat ourselves quite a bit. > > > > > > Specify `errors='backslashreplace'` so we instead: > > > * print out the offending byte as '\x80' > > > * try and continue parsing the output. > > > * as long as the TAP lines themselves are valid, we're fine. > > > > > > Signed-off-by: Daniel Latypov <dlatypov@xxxxxxxxxx> > > > --- > > > tools/testing/kunit/kunit.py | 3 ++- > > > tools/testing/kunit/kunit_kernel.py | 4 ++-- > > > 2 files changed, 4 insertions(+), 3 deletions(-) > > > > > > diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py > > > index 9c9ed4071e9e..28ae096d4b53 100755 > > > --- a/tools/testing/kunit/kunit.py > > > +++ b/tools/testing/kunit/kunit.py > > > @@ -457,9 +457,10 @@ def main(argv, linux=None): > > > sys.exit(1) > > > elif cli_args.subcommand == 'parse': > > > if cli_args.file == None: > > > + sys.stdin.reconfigure(errors='backslashreplace') > > > > Ugh, pytype doesn't like this even though it's valid. > > I can squash the error with > > sys.stdin.reconfigure(errors='backslashreplace') # pytype: > > disable=attribute-error > > > > I had wanted us to avoid having anything specific to pytype in the code. > > But mypy (the more common typechecker iirc) hasn't been smart enough > > to typecheck our code since the QEMU support landed. > > > > If we don't add this directive, both typecheckers will report at least > > one spurious warning. > > Should I go ahead and add it, Brendan/David? > > Friendly ping. > Should we go ahead and add "# pytype: disable=attribute-error" here? I've sent out a v2 with this: https://lore.kernel.org/linux-kselftest/20211020232121.1748376-1-dlatypov@xxxxxxxxxx > > > > > > kunit_output = sys.stdin > > > else: > > > - with open(cli_args.file, 'r') as f: > > > + with open(cli_args.file, 'r', errors='backslashreplace') as f: > > > kunit_output = f.read().splitlines() > > > request = KunitParseRequest(cli_args.raw_output, > > > None, > > > diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py > > > index faa6320e900e..f08c6c36a947 100644 > > > --- a/tools/testing/kunit/kunit_kernel.py > > > +++ b/tools/testing/kunit/kunit_kernel.py > > > @@ -135,7 +135,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): > > > stdin=subprocess.PIPE, > > > stdout=subprocess.PIPE, > > > stderr=subprocess.STDOUT, > > > - text=True, shell=True) > > > + text=True, shell=True, errors='backslashreplace') > > > > > > class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): > > > """An abstraction over command line operations performed on a source tree.""" > > > @@ -172,7 +172,7 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): > > > stdin=subprocess.PIPE, > > > stdout=subprocess.PIPE, > > > stderr=subprocess.STDOUT, > > > - text=True) > > > + text=True, errors='backslashreplace') > > > > > > def get_kconfig_path(build_dir) -> str: > > > return get_file_path(build_dir, KCONFIG_PATH) > > > > > > base-commit: a032094fc1ed17070df01de4a7883da7bb8d5741 > > > -- > > > 2.33.0.882.g93a45727a2-goog > > >