forked from msteveb/jimtcl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse-unidata.tcl
53 lines (49 loc) · 1.14 KB
/
parse-unidata.tcl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env tclsh
# Generate UTF-8 case mapping tables
#
# (c) 2010 Steve Bennett <[email protected]>
#
# See LICENCE for licence details.
#/
# Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
# to generate case mapping tables
set map(lower) {}
set map(upper) {}
set map(title) {}
set f [open [lindex $argv 0]]
while {[gets $f buf] >= 0} {
set title ""
set lower ""
set upper ""
foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break
set codex [string tolower 0x$code]
if {$codex <= 0x7f} {
continue
}
if {$codex > 0xffff} {
break
}
if {![string match L* $class]} {
continue
}
if {$upper ne ""} {
lappend map(upper) $codex [string tolower 0x$upper]
}
if {$lower ne ""} {
lappend map(lower) $codex [string tolower 0x$lower]
}
if {$title ne "" && $title ne $upper} {
if {$title eq $code} {
set title 0
}
lappend map(title) $codex [string tolower 0x$title]
}
}
close $f
foreach type {upper lower title} {
puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
foreach {code alt} $map($type) {
puts "\t{ $code, $alt },"
}
puts "\};\n"
}