And these patches are making the headlines somewhere :) https://www.phoronix.com/news/Linux-6.9-Speakup-Driver Cheers, Didier Le 23/03/2024 à 17:42, Samuel Thibault a écrit : > i18n currently assume latin1 encoding, which is not enough for most > languages. > > This separates out the utf-8 processing of /dev/synthu, and uses it for > a new synth_writeu, which we make synth_printf now use. This has the > effect of making all the i18 messages processed in utf-8. > > Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxx> > --- > drivers/accessibility/speakup/devsynth.c | 59 ++++----------- > drivers/accessibility/speakup/speakup.h | 2 + > drivers/accessibility/speakup/synth.c | 92 ++++++++++++++++++++++-- > 3 files changed, 102 insertions(+), 51 deletions(-) > > diff --git a/drivers/accessibility/speakup/devsynth.c b/drivers/accessibility/speakup/devsynth.c > index 674204ee5a85..e3d909bd0480 100644 > --- a/drivers/accessibility/speakup/devsynth.c > +++ b/drivers/accessibility/speakup/devsynth.c > @@ -39,13 +39,13 @@ static ssize_t speakup_file_write(struct file *fp, const char __user *buffer, > static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer, > size_t nbytes, loff_t *ppos) > { > - size_t count = nbytes, want; > + size_t count = nbytes, consumed, want; > const char __user *ptr = buffer; > size_t bytes; > unsigned long flags; > unsigned char buf[256]; > u16 ubuf[256]; > - size_t in, in2, out; > + size_t in, out; > > if (!synth) > return -ENODEV; > @@ -58,57 +58,24 @@ static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer, > return -EFAULT; > > /* Convert to u16 */ > - for (in = 0, out = 0; in < bytes; in++) { > - unsigned char c = buf[in]; > - int nbytes = 8 - fls(c ^ 0xff); > - u32 value; > - > - switch (nbytes) { > - case 8: /* 0xff */ > - case 7: /* 0xfe */ > - case 1: /* 0x80 */ > - /* Invalid, drop */ > - goto drop; > - > - case 0: > - /* ASCII, copy */ > - ubuf[out++] = c; > - continue; > + for (in = 0, out = 0; in < bytes; in += consumed) { > + s32 value; > > - default: > - /* 2..6-byte UTF-8 */ > + value = synth_utf8_get(buf + in, bytes - in, &consumed, &want); > + if (value == -1) { > + /* Invalid or incomplete */ > > - if (bytes - in < nbytes) { > + if (want > bytes - in) > /* We don't have it all yet, stop here > * and wait for the rest > */ > bytes = in; > - want = nbytes; > - continue; > - } > - > - /* First byte */ > - value = c & ((1u << (7 - nbytes)) - 1); > - > - /* Other bytes */ > - for (in2 = 2; in2 <= nbytes; in2++) { > - c = buf[in + 1]; > - if ((c & 0xc0) != 0x80) { > - /* Invalid, drop the head */ > - want = 1; > - goto drop; > - } > - value = (value << 6) | (c & 0x3f); > - in++; > - } > - > - if (value < 0x10000) > - ubuf[out++] = value; > - want = 1; > - break; > + > + continue; > } > -drop: > - ; > + > + if (value < 0x10000) > + ubuf[out++] = value; > } > > count -= bytes; > diff --git a/drivers/accessibility/speakup/speakup.h b/drivers/accessibility/speakup/speakup.h > index 364fde99749e..54f1226ea061 100644 > --- a/drivers/accessibility/speakup/speakup.h > +++ b/drivers/accessibility/speakup/speakup.h > @@ -76,7 +76,9 @@ int speakup_paste_selection(struct tty_struct *tty); > void speakup_cancel_paste(void); > void speakup_register_devsynth(void); > void speakup_unregister_devsynth(void); > +s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want); > void synth_write(const char *buf, size_t count); > +void synth_writeu(const char *buf, size_t count); > int synth_supports_indexing(void); > > extern struct vc_data *spk_sel_cons; > diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c > index eea2a2fa4f01..c6339758fa67 100644 > --- a/drivers/accessibility/speakup/synth.c > +++ b/drivers/accessibility/speakup/synth.c > @@ -215,10 +215,95 @@ void synth_write(const char *buf, size_t count) > synth_start(); > } > > +/* Consume one utf-8 character from buf (that contains up to count bytes), > + * returns the unicode codepoint if valid, -1 otherwise. > + * In all cases, returns the number of consumed bytes in *consumed, > + * and the minimum number of bytes that would be needed for the next character > + * in *want. > + */ > +s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want) > +{ > + unsigned char c = buf[0]; > + int nbytes = 8 - fls(c ^ 0xff); > + u32 value; > + size_t i; > + > + switch (nbytes) { > + case 8: /* 0xff */ > + case 7: /* 0xfe */ > + case 1: /* 0x80 */ > + /* Invalid, drop */ > + *consumed = 1; > + *want = 1; > + return -1; > + > + case 0: > + /* ASCII, take as such */ > + *consumed = 1; > + *want = 1; > + return c; > + > + default: > + /* 2..6-byte UTF-8 */ > + > + if (count < nbytes) { > + /* We don't have it all */ > + *consumed = 0; > + *want = nbytes; > + return -1; > + } > + > + /* First byte */ > + value = c & ((1u << (7 - nbytes)) - 1); > + > + /* Other bytes */ > + for (i = 1; i < nbytes; i++) { > + c = buf[i]; > + if ((c & 0xc0) != 0x80) { > + /* Invalid, drop the head */ > + *consumed = i; > + *want = 1; > + return -1; > + } > + value = (value << 6) | (c & 0x3f); > + } > + > + *consumed = nbytes; > + *want = 1; > + return value; > + } > +} > + > +void synth_writeu(const char *buf, size_t count) > +{ > + size_t i, consumed, want; > + > + /* Convert to u16 */ > + for (i = 0; i < count; i++) { > + s32 value; > + > + value = synth_utf8_get(buf + i, count - i, &consumed, &want); > + if (value == -1) { > + /* Invalid or incomplete */ > + > + if (want > count - i) > + /* We don't have it all, stop */ > + count = i; > + > + continue; > + } > + > + if (value < 0x10000) > + synth_buffer_add(value); > + } > + > + synth_start(); > +} > + > void synth_printf(const char *fmt, ...) > { > va_list args; > - unsigned char buf[160], *p; > + unsigned char buf[160]; > int r; > > va_start(args, fmt); > @@ -227,10 +312,7 @@ void synth_printf(const char *fmt, ...) > if (r > sizeof(buf) - 1) > r = sizeof(buf) - 1; > > - p = buf; > - while (r--) > - synth_buffer_add(*p++); > - synth_start(); > + synth_writeu(buf, r); > } > EXPORT_SYMBOL_GPL(synth_printf); >