fc-blanks/fc-blanks.py | 21 ++++++- fc-blanks/list-unicodeset.html | 119 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 3 deletions(-) New commits: commit 600110ee8c3e9bdd18cd5bc27555d1f1114e4880 Author: Akira TAGOH <akira@xxxxxxxxx> Date: Thu Jun 9 14:22:31 2016 +0900 Add the static raw data to generate fcblanks.h https://bugs.freedesktop.org/show_bug.cgi?id=91406 diff --git a/fc-blanks/fc-blanks.py b/fc-blanks/fc-blanks.py index 81b07d2..b88a0aa 100755 --- a/fc-blanks/fc-blanks.py +++ b/fc-blanks/fc-blanks.py @@ -4,12 +4,27 @@ from __future__ import absolute_import from __future__ import print_function import urllib2 import sys +import os from lxml import html from six.moves import range -fp = urllib2.urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g') -data = fp.read() -fp.close() +datafile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'list-unicodeset.html') +try: + fp = urllib2.urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g') + data = fp.read() + fp.close() + fp = open(datafile, 'w'); + fp.write(data); + fp.close(); +except urllib2.URLError: + # fall back reading the static data in repo + try: + fp = open(datafile) + data = fp.read() + fp.close() + except IOError: + sys.stderr.write("Error: No static data to generate the blank data. please make sure the network connection is reachable to Unicode.org\n") + sys.exit(1) dom = html.fromstring(data) x = dom.xpath('/html/body/form/p/text()') diff --git a/fc-blanks/list-unicodeset.html b/fc-blanks/list-unicodeset.html new file mode 100644 index 0000000..6e95efa --- /dev/null +++ b/fc-blanks/list-unicodeset.html @@ -0,0 +1,119 @@ +<html> +<head> +<meta http-equiv="Content-Language" content="en-us"> +<meta name="GENERATOR" content="Microsoft FrontPage 6.0"> +<meta name="ProgId" content="FrontPage.Editor.Document"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> +<link rel="stylesheet" type="text/css" href="index.css"> + + + + + + + + + + + + + + + + + + + +<title>Unicode Utilities: UnicodeSet</title> +</head> +<body> + +<h1>Unicode Utilities: UnicodeSet </h1> +<p><a target="help" href="http://cldr.unicode.org/unicode-utilities/list-unicodeset"><b>help</b></a> | <a target="character" href="character.jsp">character</a> + | <a target="properties" href="properties.jsp">properties</a> + | <a target="confusables" href="confusables.jsp">confusables</a> + | <a target="list" href="list-unicodeset.jsp">unicode-set</a> + | <a target="compare" href="unicodeset.jsp">compare-sets</a> + | <a target="regex" href="regex.jsp">regex</a> + | <a target="bnf" href="bnf.jsp">bnf-regex</a> + | <a target="breaks" href="breaks.jsp">breaks</a> + | <a target="transform" href="transform.jsp">transform</a> + | <a target="bidi" href="bidi.jsp">bidi</a> + | <a target="idna" href="idna.jsp">idna</a> + | <a target="languageid" href="languageid.jsp">languageid</a></p> +<form name="myform"> + <table border="1" cellpadding="0" cellspacing="0" style="border-collapse: collapse; width:100%"> + <tr> + <th style="width: 50%">Input</th> + </tr> + <tr> + <td><textarea name="a" rows="8" cols="10" style="width: 100%">[:GC=Zs:][:DI:]</textarea></td> + </tr> + <tr> + <td> + <input id='main' type="submit" value="Show Set" onClick="window.location.href='list-unicodeset.jsp?a='+document.getElementById('main').value"/> + <input type="checkbox" checked name="abb"><label for="abb">Abbreviate</label> + <input type="checkbox" name="c"><label for="c">Collate</label> + <input type="checkbox" checked name="ucd"><label for="ucd">UCD format</label> + <input type="checkbox" checked name="esc"><label for="esc">Escape</label> + <label for="g">Group by:</label> + <input type="text" checked name="g" size="25" value=""> + <label for="i">Info:</label> + <input type="text" checked name="i" size="25" value=""> + </td> + </tr> +</table> + <p>4,190 Code Points</p> + <hr> + <p>[\ \u00A0\u00AD\u034F\u061C\u115F\u1160\u1680\u17B4\u17B5\u180B-\u180E\u2000-\u200F\u202A-\u202F\u205F-\u206F\u3000\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8\U0001BCA0-\U0001BCA3\U0001D173-\U0001D17A\U000E0000-\U000E0FFF]</p> + <hr> + <table width='100%'><tr><td colSpan='4'><tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=0020'>0020</a></code></td><td>SPACE</td></tr> +<tr><td class='charCell' width='3m'>   </td><td width='7m'><code><a target='c' href='character.jsp?a=00A0'>00A0</a></code></td><td>NO-BREAK SPACE</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=00AD'>00AD</a></code></td><td>SOFT HYPHEN</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=034F'>034F</a></code></td><td>COMBINING GRAPHEME JOINER</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=061C'>061C</a></code></td><td>ARABIC LETTER MARK</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=115F'>115F</a></code></td><td>HANGUL CHOSEONG FILLER</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1160'>1160</a></code></td><td>HANGUL JUNGSEONG FILLER</td></tr> +<tr><td class='charCell' width='3m'> á?? </td><td width='7m'><code><a target='c' href='character.jsp?a=1680'>1680</a></code></td><td>OGHAM SPACE MARK</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=17B4'>17B4</a></code></td><td>KHMER VOWEL INHERENT AQ</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=17B5'>17B5</a></code></td><td>KHMER VOWEL INHERENT AA</td></tr> +<code><a target='c' href='character.jsp?a=180B'>180B</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=180E'>180E</a></code></td><td>MONGOLIAN VOWEL SEPARATOR</td></tr> +<code><a target='c' href='character.jsp?a=2000'>2000</a></code>..<tr><td class='charCell' width='3m'> â??â??â?? </td><td width='7m'><code><a target='c' href='character.jsp?a=200F'>200F</a></code></td><td>RIGHT-TO-LEFT MARK</td></tr> +<code><a target='c' href='character.jsp?a=202A'>202A</a></code>..<tr><td class='charCell' width='3m'> â?¯Â </td><td width='7m'><code><a target='c' href='character.jsp?a=202F'>202F</a></code></td><td>NARROW NO-BREAK SPACE</td></tr> +<code><a target='c' href='character.jsp?a=205F'>205F</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=206F'>206F</a></code></td><td>NOMINAL DIGIT SHAPES</td></tr> +<tr><td class='charCell' width='3m'> ã?? </td><td width='7m'><code><a target='c' href='character.jsp?a=3000'>3000</a></code></td><td>IDEOGRAPHIC SPACE</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=3164'>3164</a></code></td><td>HANGUL FILLER</td></tr> +<code><a target='c' href='character.jsp?a=FE00'>FE00</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FE0F'>FE0F</a></code></td><td>VARIATION SELECTOR-16</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FEFF'>FEFF</a></code></td><td>ZERO WIDTH NO-BREAK SPACE</td></tr> +<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FFA0'>FFA0</a></code></td><td>HALFWIDTH HANGUL FILLER</td></tr> +<code><a target='c' href='character.jsp?a=FFF0'>FFF0</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=FFF8'>FFF8</a></code></td><td><i><unassigned-FFF8></i></td></tr> +<code><a target='c' href='character.jsp?a=1BCA0'>1BCA0</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1BCA3'>1BCA3</a></code></td><td>SHORTHAND FORMAT UP STEP</td></tr> +<code><a target='c' href='character.jsp?a=1D173'>1D173</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=1D17A'>1D17A</a></code></td><td>MUSICAL SYMBOL END PHRASE</td></tr> +<code><a target='c' href='character.jsp?a=E0000'>E0000</a></code>..<tr><td class='charCell' width='3m'>  </td><td width='7m'><code><a target='c' href='character.jsp?a=E0FFF'>E0FFF</a></code></td><td><i><unassigned-E0FFF></i></td></tr> +</td></tr></table> +</form> +<hr> +<p style="font-size:80%"><b><a name="fonts">Fonts and Display.</a></b> If you don't have a good set of Unicode fonts (and modern browser), +you may not be able to read some of the characters. +Some suggested fonts that you can add for coverage are: +<a href="http://greekfonts.teilar.gr/" target="_blank">Unicode Fonts for Ancient Scripts</a>, +<a href="https://www.google.com/get/noto/" target="_blank">Noto Fonts site</a>, +<a href="http://www.alanwood.net/unicode/fonts.html" target="_blank">Large, multi-script Unicode fonts</a>. +See also: <a href="http://www.unicode.org/help/display_problems.html" target="_blank">Unicode Display Problems</a>.</p> +<p style="font-size:80%">Version 3.7; +ICU version: 57.0.1.0; +Unicode version: 8.0.0.0 +</p> +<script type="text/javascript"> +var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www."); +document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E")); +</script> +<script type="text/javascript"> +try { +var pageTracker = _gat._getTracker("UA-8314904-1"); +pageTracker._trackPageview(); +} catch(err) {} +</script> +<hr> +</body> +</html>
_______________________________________________ Fontconfig mailing list Fontconfig@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/fontconfig