On Wed, Dec 05, 2012 at 07:00:41PM +0200, Zeeshan Ali (Khattak) wrote: > On Mon, Dec 3, 2012 at 1:23 PM, Christophe Fergeau <cfergeau@xxxxxxxxxx> wrote: > > Now that libosinfo has an osinfo_db_identify_media method which > > modifies the media it was passed, we can generate properties which > > needs information from the media stored in the OsinfoDB, and > > information from the actual media (ISO volume ID). > > This is useful to guess what languages are supported by a given > > Windows ISO: the end of the ISO volume ID has a language code, which > > we can translate to a locale identifier. > > > > This commit adds a lang-regex property to the OsinfoDB database to > > extract the language code from Windows ISO volume IDs, and > > then add mapping tables to turn it into a locale identifier. > > --- > > data/oses/windows.xml.in | 2 + > > data/schemas/libosinfo.rng | 5 ++ > > osinfo/libosinfo.syms | 4 +- > > osinfo/osinfo_db.c | 177 +++++++++++++++++++++++++++++++++++++++++++++ > > osinfo/osinfo_loader.c | 4 +- > > osinfo/osinfo_media.c | 67 ++++++++++++++++- > > osinfo/osinfo_media.h | 3 + > > 7 files changed, 258 insertions(+), 4 deletions(-) > > > > diff --git a/data/oses/windows.xml.in b/data/oses/windows.xml.in > > index d09e873..e8c29f9 100644 > > --- a/data/oses/windows.xml.in > > +++ b/data/oses/windows.xml.in > > @@ -739,12 +739,14 @@ > > <iso> > > <volume-id>(HB1_CCPA_X86FRE|HRM_CCSA_X86FRE|HRM_CCSA_X86CHK|HRM_CCSNA_X86CHK|HRM_CCSNA_X86FRE|HRM_CENA_X86FREV|HRM_CENA_X86CHKV|HRM_CENNA_X86FREV|HRM_CENNA_X86CHKV|HRM_CPRA_X86FREV|HRM_CPRNA_X86FREV)_</volume-id> > > <publisher-id>MICROSOFT CORPORATION</publisher-id> > > + <lang-regex>[[:upper:][:digit:]_]*_([[:upper:]]*-[[:upper:]]*)</lang-regex> > > </iso> > > </media> > > <media arch="x86_64"> > > <iso> > > <volume-id>(HB1_CCPA_X64FRE|HRM_CCSA_X64FRE|HRM_CCSA_X64CHK|HRM_CCSNA_X64FRE|HRM_CCSNA_X64CHK|HRM_CENNA_X64FREV|HRM_CENNA_X64CHKV|HRM_CENA_X64FREV|HRM_CENA_X64CHKV|HRM_CPRA_X64FREV|HRM_CPRNA_X64FREV)_</volume-id> > > <publisher-id>MICROSOFT CORPORATION</publisher-id> > > + <lang-regex>[[:upper:][:digit:]_]*_([[:upper:]]*-[[:upper:]]*)</lang-regex> > > </iso> > > </media> > > > > diff --git a/data/schemas/libosinfo.rng b/data/schemas/libosinfo.rng > > index 87635dd..36fa1a1 100644 > > --- a/data/schemas/libosinfo.rng > > +++ b/data/schemas/libosinfo.rng > > @@ -281,6 +281,11 @@ > > <text/> > > </element> > > </optional> > > + <optional> > > + <element name='lang-regex'> > > + <text/> > > + </element> > > + </optional> > > </interleave> > > </element> > > </define> > > diff --git a/osinfo/libosinfo.syms b/osinfo/libosinfo.syms > > index d45e58e..7c3efe1 100644 > > --- a/osinfo/libosinfo.syms > > +++ b/osinfo/libosinfo.syms > > @@ -341,11 +341,11 @@ LIBOSINFO_0.2.2 { > > osinfo_install_config_set_target_disk; > > osinfo_install_config_get_script_disk; > > osinfo_install_config_set_script_disk; > > - > > osinfo_install_script_get_avatar_format; > > osinfo_install_script_get_path_format; > > - > > osinfo_install_script_get_product_key_format; > > + > > + osinfo_media_get_languages; > > } LIBOSINFO_0.2.1; > > > > /* Symbols in next release... > > diff --git a/osinfo/osinfo_db.c b/osinfo/osinfo_db.c > > index 46101d6..2c2eb5a 100644 > > --- a/osinfo/osinfo_db.c > > +++ b/osinfo/osinfo_db.c > > @@ -38,6 +38,177 @@ G_DEFINE_TYPE (OsinfoDb, osinfo_db, G_TYPE_OBJECT); > > (((str) != NULL) && \ > > g_regex_match_simple((pattern), (str), 0, 0))) > > > > +static gchar *get_raw_lang(const char *volume_id, const gchar *regex_str) > > +{ > > + GRegex *regex; > > + GMatchInfo *match; > > + gboolean matched; > > + gchar *raw_lang = NULL; > > + > > + regex = g_regex_new(regex_str, G_REGEX_ANCHORED, > > + G_REGEX_MATCH_ANCHORED, NULL); > > + if (regex == NULL) > > + return NULL; > > + > > + matched = g_regex_match(regex, volume_id, G_REGEX_MATCH_ANCHORED, &match); > > + if (!matched || !g_match_info_matches(match)) > > + goto end; > > + raw_lang = g_match_info_fetch(match, 1); > > + if (raw_lang == NULL) > > + goto end; > > + > > +end: > > + g_match_info_unref(match); > > + g_regex_unref(regex); > > + > > + return raw_lang; > > +} > > + > > +struct LanguageMapping { > > + const char *iso_label_lang; > > + const char *gettext_lang; > > +}; > > + > > +static GHashTable *init_win_lang_map(void) > > +{ > > + GHashTable *lang_map; > > + const struct LanguageMapping lang_table[] = { > > + /* ISO label strings up to Windows 7 */ > > + { "EN", "en_US" }, > > + { "AR", "ar_SA" }, > > + { "BG", "bg_BG" }, > > + { "HK", "zh_HK" }, > > + { "CN", "zh_CN" }, > > + { "TW", "zh_TW" }, > > + { "HR", "hr_HR" }, > > + { "CS", "cs_CZ" }, > > + { "DA", "da_DK" }, > > + { "NL", "nl_NL" }, > > + { "ET", "et_EE" }, > > + { "FI", "fi_FI" }, > > + { "FR", "fr_FR" }, > > + { "DE", "de_DE" }, > > + { "EL", "el_GR" }, > > + { "HE", "he_IL" }, > > + { "HU", "hu_HU" }, > > + { "IT", "it_IT" }, > > + { "JA", "ja_JP" }, > > + { "KO", "ko_KR" }, > > + { "LV", "lv_LV" }, > > + { "LT", "lt_LT" }, > > + { "NO", "nb_NO" }, > > + { "PL", "pl_PL" }, > > + { "BR", "pt_BR" }, > > + { "PT", "pt_PT" }, > > + { "RO", "ro_RO" }, > > + { "RU", "ru_RU" }, > > + { "SRL", "sr_RS@latin" }, > > + { "SK", "sk_SK" }, > > + { "SL", "sl_SI" }, > > + { "ES", "es_ES" }, > > + { "SV", "sv_SE" }, > > + { "TH", "th_TH" }, > > + { "TR", "tr_TR" }, > > + { "UK", "uk_UA" }, > > + > > + /* starting from Windows 8, the ISO label contains both > > + * language and country code */ > > + { "EN-US", "en_US" }, > > + { "EN-GB", "en_GB" }, > > + { "AR-SA", "ar_SA" }, > > + { "BG-BG", "bg_BG" }, > > + { "ZH-HK", "zh_HK" }, > > + { "ZH-CN", "zh_CN" }, > > + { "ZH-TW", "zh_TW" }, > > + { "HR-HR", "hr_HR" }, > > + { "CS-CZ", "cs_CZ" }, > > + { "DA-DK", "da_DK" }, > > + { "NL-NL", "nl_NL" }, > > + { "ET-EE", "et_EE" }, > > + { "FI-FI", "fi_FI" }, > > + { "FR-FR", "fr_FR" }, > > + { "DE-DE", "de_DE" }, > > + { "EL-GR", "el_GR" }, > > + { "HE-IL", "he_IL" }, > > + { "HU-HU", "hu_HU" }, > > + { "IT-IT", "it_IT" }, > > + { "JA-JP", "ja_JP" }, > > + { "KO-KR", "ko_KR" }, > > + { "LV-LV", "lv_LV" }, > > + { "LT-LT", "lt_LT" }, > > + { "NB-NO", "nb_NO" }, > > + { "PL-PL", "pl_PL" }, > > + { "PT-BR", "pt_BR" }, > > + { "PT-PT", "pt_PT" }, > > + { "RO-RO", "ro_RO" }, > > + { "RU-RU", "ru_RU" }, > > + { "SR-LATN-CS", "sr_RS@latin" }, > > + { "SK-SK", "sk_SK" }, > > + { "SL-SI", "sl_SI" }, > > + { "ES-ES", "es_ES" }, > > + { "SV-SE", "sv_SE" }, > > + { "TH-TH", "th_TH" }, > > + { "TR-TR", "tr_TR" }, > > + { "UK-UA", "uk_UA" }, > > + > > + { "EU-ES", "eu_ES" }, //language pack > > + { "CA-ES", "ca_ES" }, //language pack > > + { "GL-ES", "gl_ES" }, //language pack > > + { "KY-KG", "ky_KG" }, //language pack > > + > > + { NULL, NULL } > > + }; > > Seems all of these except for 1 can be covered by a simple 's/-/_/' > conversion and thus do not need all this hard coding. I prefer an explicit list of the languages we expect to find, especially as there are already 2 different formats. Christophe
Attachment:
pgp53IAzms3yC.pgp
Description: PGP signature
_______________________________________________ virt-tools-list mailing list virt-tools-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/virt-tools-list