Change the encoding of strings returned by usb_string() from ISO 8859-1 to UTF-8. Signed-off-by: Clemens Ladisch <clemens@xxxxxxxxxx> --- linux-2.6.orig/drivers/usb/Kconfig +++ linux-2.6/drivers/usb/Kconfig @@ -62,6 +62,7 @@ config USB_ARCH_HAS_EHCI config USB tristate "Support for Host-side USB" depends on USB_ARCH_HAS_HCD + select NLS # for UTF-8 strings ---help--- Universal Serial Bus (USB) is a specification for a serial bus subsystem which offers higher speeds and more features than the --- linux-2.6.orig/drivers/usb/core/message.c +++ linux-2.6/drivers/usb/core/message.c @@ -10,6 +10,7 @@ #include <linux/mm.h> #include <linux/timer.h> #include <linux/ctype.h> +#include <linux/nls.h> #include <linux/device.h> #include <linux/scatterlist.h> #include <linux/usb/quirks.h> @@ -759,7 +760,7 @@ static int usb_string_sub(struct usb_dev } /** - * usb_string - returns ISO 8859-1 version of a string descriptor + * usb_string - returns UTF-8 version of a string descriptor * @dev: the device whose string descriptor is being retrieved * @index: the number of the descriptor * @buf: where to put the string @@ -767,17 +768,10 @@ static int usb_string_sub(struct usb_dev * Context: !in_interrupt () * * This converts the UTF-16LE encoded strings returned by devices, from - * usb_get_string_descriptor(), to null-terminated ISO-8859-1 encoded ones - * that are more usable in most kernel contexts. Note that all characters - * in the chosen descriptor that can't be encoded using ISO-8859-1 - * are converted to the question mark ("?") character, and this function + * usb_get_string_descriptor(), to null-terminated UTF-8 encoded ones + * that are more usable in most kernel contexts. Note that this function * chooses strings in the first language supported by the device. * - * The ASCII (or, redundantly, "US-ASCII") character set is the seven-bit - * subset of ISO 8859-1. ISO-8859-1 is the eight-bit subset of Unicode, - * and is appropriate for use many uses of English and several other - * Western European languages. (But it doesn't include the "Euro" symbol.) - * * This call is synchronous, and may not be used in an interrupt context. * * Returns length of the string (>= 0) or usb_control_msg status (< 0). @@ -786,14 +780,14 @@ int usb_string(struct usb_device *dev, i { unsigned char *tbuf; int err; - unsigned int u, idx; + unsigned int u; if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; if (size <= 0 || !buf || !index) return -EINVAL; buf[0] = 0; - tbuf = kmalloc(256, GFP_NOIO); + tbuf = kmalloc(256 + 2, GFP_NOIO); if (!tbuf) return -ENOMEM; @@ -822,17 +816,13 @@ int usb_string(struct usb_device *dev, i if (err < 0) goto errout; + for (u = 2; u < err; u += 2) + le16_to_cpus((u16 *)&tbuf[u]); + tbuf[u] = 0; + tbuf[u + 1] = 0; size--; /* leave room for trailing NULL char in output buffer */ - for (idx = 0, u = 2; u < err; u += 2) { - if (idx >= size) - break; - if (tbuf[u+1]) /* high byte */ - buf[idx++] = '?'; /* non ISO-8859-1 character */ - else - buf[idx++] = tbuf[u]; - } - buf[idx] = 0; - err = idx; + err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size); + buf[err] = 0; if (tbuf[1] != USB_DT_STRING) dev_dbg(&dev->dev, @@ -845,6 +835,9 @@ int usb_string(struct usb_device *dev, i } EXPORT_SYMBOL_GPL(usb_string); +/* one UTF-8-encoded 16-bit character has at most three bytes */ +#define MAX_USB_STRING_SIZE (127 * 3 + 1) + /** * usb_cache_string - read a string descriptor and cache it for later use * @udev: the device whose string descriptor is being read @@ -862,9 +855,9 @@ char *usb_cache_string(struct usb_device if (index <= 0) return NULL; - buf = kmalloc(256, GFP_KERNEL); + buf = kmalloc(MAX_USB_STRING_SIZE, GFP_KERNEL); if (buf) { - len = usb_string(udev, index, buf, 256); + len = usb_string(udev, index, buf, MAX_USB_STRING_SIZE); if (len > 0) { smallbuf = kmalloc(++len, GFP_KERNEL); if (!smallbuf) -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html