Re: [PATCH] speakup: Turn i18n files utf-8

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



And these patches are making the headlines somewhere :)
https://www.phoronix.com/news/Linux-6.9-Speakup-Driver

Cheers,
Didier


Le 23/03/2024 à 17:42, Samuel Thibault a écrit :
> i18n currently assume latin1 encoding, which is not enough for most
> languages.
> 
> This separates out the utf-8 processing of /dev/synthu, and uses it for
> a new synth_writeu, which we make synth_printf now use. This has the
> effect of making all the i18 messages processed in utf-8.
> 
> Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxx>
> ---
>  drivers/accessibility/speakup/devsynth.c | 59 ++++-----------
>  drivers/accessibility/speakup/speakup.h  |  2 +
>  drivers/accessibility/speakup/synth.c    | 92 ++++++++++++++++++++++--
>  3 files changed, 102 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/accessibility/speakup/devsynth.c b/drivers/accessibility/speakup/devsynth.c
> index 674204ee5a85..e3d909bd0480 100644
> --- a/drivers/accessibility/speakup/devsynth.c
> +++ b/drivers/accessibility/speakup/devsynth.c
> @@ -39,13 +39,13 @@ static ssize_t speakup_file_write(struct file *fp, const char __user *buffer,
>  static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer,
>  				   size_t nbytes, loff_t *ppos)
>  {
> -	size_t count = nbytes, want;
> +	size_t count = nbytes, consumed, want;
>  	const char __user *ptr = buffer;
>  	size_t bytes;
>  	unsigned long flags;
>  	unsigned char buf[256];
>  	u16 ubuf[256];
> -	size_t in, in2, out;
> +	size_t in, out;
>  
>  	if (!synth)
>  		return -ENODEV;
> @@ -58,57 +58,24 @@ static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer,
>  			return -EFAULT;
>  
>  		/* Convert to u16 */
> -		for (in = 0, out = 0; in < bytes; in++) {
> -			unsigned char c = buf[in];
> -			int nbytes = 8 - fls(c ^ 0xff);
> -			u32 value;
> -
> -			switch (nbytes) {
> -			case 8: /* 0xff */
> -			case 7: /* 0xfe */
> -			case 1: /* 0x80 */
> -				/* Invalid, drop */
> -				goto drop;
> -
> -			case 0:
> -				/* ASCII, copy */
> -				ubuf[out++] = c;
> -				continue;
> +		for (in = 0, out = 0; in < bytes; in += consumed) {
> +			s32 value;
>  
> -			default:
> -				/* 2..6-byte UTF-8 */
> +			value = synth_utf8_get(buf + in, bytes - in, &consumed, &want);
> +			if (value == -1) {
> +				/* Invalid or incomplete */
>  
> -				if (bytes - in < nbytes) {
> +				if (want > bytes - in)
>  					/* We don't have it all yet, stop here
>  					 * and wait for the rest
>  					 */
>  					bytes = in;
> -					want = nbytes;
> -					continue;
> -				}
> -
> -				/* First byte */
> -				value = c & ((1u << (7 - nbytes)) - 1);
> -
> -				/* Other bytes */
> -				for (in2 = 2; in2 <= nbytes; in2++) {
> -					c = buf[in + 1];
> -					if ((c & 0xc0) != 0x80)	{
> -						/* Invalid, drop the head */
> -						want = 1;
> -						goto drop;
> -					}
> -					value = (value << 6) | (c & 0x3f);
> -					in++;
> -				}
> -
> -				if (value < 0x10000)
> -					ubuf[out++] = value;
> -				want = 1;
> -				break;
> +
> +				continue;
>  			}
> -drop:
> -			;
> +
> +			if (value < 0x10000)
> +				ubuf[out++] = value;
>  		}
>  
>  		count -= bytes;
> diff --git a/drivers/accessibility/speakup/speakup.h b/drivers/accessibility/speakup/speakup.h
> index 364fde99749e..54f1226ea061 100644
> --- a/drivers/accessibility/speakup/speakup.h
> +++ b/drivers/accessibility/speakup/speakup.h
> @@ -76,7 +76,9 @@ int speakup_paste_selection(struct tty_struct *tty);
>  void speakup_cancel_paste(void);
>  void speakup_register_devsynth(void);
>  void speakup_unregister_devsynth(void);
> +s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want);
>  void synth_write(const char *buf, size_t count);
> +void synth_writeu(const char *buf, size_t count);
>  int synth_supports_indexing(void);
>  
>  extern struct vc_data *spk_sel_cons;
> diff --git a/drivers/accessibility/speakup/synth.c b/drivers/accessibility/speakup/synth.c
> index eea2a2fa4f01..c6339758fa67 100644
> --- a/drivers/accessibility/speakup/synth.c
> +++ b/drivers/accessibility/speakup/synth.c
> @@ -215,10 +215,95 @@ void synth_write(const char *buf, size_t count)
>  	synth_start();
>  }
>  
> +/* Consume one utf-8 character from buf (that contains up to count bytes),
> + * returns the unicode codepoint if valid, -1 otherwise.
> + * In all cases, returns the number of consumed bytes in *consumed,
> + * and the minimum number of bytes that would be needed for the next character
> + * in *want.
> + */
> +s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want)
> +{
> +	unsigned char c = buf[0];
> +	int nbytes = 8 - fls(c ^ 0xff);
> +	u32 value;
> +	size_t i;
> +
> +	switch (nbytes) {
> +	case 8: /* 0xff */
> +	case 7: /* 0xfe */
> +	case 1: /* 0x80 */
> +		/* Invalid, drop */
> +		*consumed = 1;
> +		*want = 1;
> +		return -1;
> +
> +	case 0:
> +		/* ASCII, take as such */
> +		*consumed = 1;
> +		*want = 1;
> +		return c;
> +
> +	default:
> +		/* 2..6-byte UTF-8 */
> +
> +		if (count < nbytes) {
> +			/* We don't have it all */
> +			*consumed = 0;
> +			*want = nbytes;
> +			return -1;
> +		}
> +
> +		/* First byte */
> +		value = c & ((1u << (7 - nbytes)) - 1);
> +
> +		/* Other bytes */
> +		for (i = 1; i < nbytes; i++) {
> +			c = buf[i];
> +			if ((c & 0xc0) != 0x80)	{
> +				/* Invalid, drop the head */
> +				*consumed = i;
> +				*want = 1;
> +				return -1;
> +			}
> +			value = (value << 6) | (c & 0x3f);
> +		}
> +
> +		*consumed = nbytes;
> +		*want = 1;
> +		return value;
> +	}
> +}
> +
> +void synth_writeu(const char *buf, size_t count)
> +{
> +	size_t i, consumed, want;
> +
> +	/* Convert to u16 */
> +	for (i = 0; i < count; i++) {
> +		s32 value;
> +
> +		value = synth_utf8_get(buf + i, count - i, &consumed, &want);
> +		if (value == -1) {
> +			/* Invalid or incomplete */
> +
> +			if (want > count - i)
> +				/* We don't have it all, stop */
> +				count = i;
> +
> +			continue;
> +		}
> +
> +		if (value < 0x10000)
> +			synth_buffer_add(value);
> +	}
> +
> +	synth_start();
> +}
> +
>  void synth_printf(const char *fmt, ...)
>  {
>  	va_list args;
> -	unsigned char buf[160], *p;
> +	unsigned char buf[160];
>  	int r;
>  
>  	va_start(args, fmt);
> @@ -227,10 +312,7 @@ void synth_printf(const char *fmt, ...)
>  	if (r > sizeof(buf) - 1)
>  		r = sizeof(buf) - 1;
>  
> -	p = buf;
> -	while (r--)
> -		synth_buffer_add(*p++);
> -	synth_start();
> +	synth_writeu(buf, r);
>  }
>  EXPORT_SYMBOL_GPL(synth_printf);
>  




[Index of Archives]     [Linux for the Blind]     [Fedora Discussioin]     [Linux Kernel]     [Yosemite News]     [Big List of Linux Books]

  Powered by Linux