forked from ruby/ruby
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* enc/unicode/case-folding.rb: script to convert CaseFolding.txt, tranlated from CaseFolding.py. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46266 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
- Loading branch information
Showing
2 changed files
with
112 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#!/usr/bin/ruby | ||
|
||
# Usage: | ||
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt | ||
# $ ruby CaseFolding.rb CaseFolding.txt > ../enc/unicode/casefold.h | ||
|
||
|
||
def hex_seq(v) | ||
v.map {|i| "0x%04x" % i}.join(", ") | ||
end | ||
|
||
def print_table(table, data) | ||
print("static const #{table}[] = {\n") | ||
for k, v in data.sort | ||
if Array === k and k.length > 1 | ||
sk = "{#{hex_seq(k)}}" | ||
else | ||
sk = "0x%04x" % k | ||
end | ||
print(" {#{sk}, {#{v.length}, {#{hex_seq(v)}}}},\n") | ||
end | ||
print("};\n\n") | ||
end | ||
|
||
def print_case_folding_data(filename) | ||
pattern = /([0-9A-F]{4,6}); ([CFT]); ([0-9A-F]{4,6})(?: ([0-9A-F]{4,6}))?(?: ([0-9A-F]{4,6}))?;/ | ||
|
||
fold = {} | ||
unfold = [{}, {}, {}] | ||
turkic = [] | ||
|
||
IO.foreach(filename) do |line| | ||
next unless res = pattern.match(line) | ||
ch_from = res[1].to_i(16) | ||
ch_to = [] | ||
|
||
if res[2] == 'T' | ||
# Turkic case folding | ||
turkic << ch_from | ||
next | ||
end | ||
|
||
# store folding data | ||
(3..6).each do |i| | ||
if res[i] | ||
ch_to << res[i].to_i(16) | ||
end | ||
end | ||
fold[ch_from] = ch_to | ||
|
||
# store unfolding data | ||
i = ch_to.length - 1 | ||
(unfold[i][ch_to] ||= []) << ch_from | ||
end | ||
|
||
# move locale dependent data to (un)fold_locale | ||
fold_locale = {} | ||
unfold_locale = [{}, {}] | ||
for ch_from in turkic | ||
key = fold[ch_from] | ||
i = key.length - 1 | ||
unfold_locale[i][i == 0 ? key[0] : key] = unfold[i].delete(key) | ||
fold_locale[ch_from] = fold.delete(ch_from) | ||
end | ||
|
||
# print the header | ||
print("/* DO NOT EDIT THIS FILE. */\n") | ||
print("/* Generated by tool/CaseFolding.py */\n\n") | ||
|
||
# print folding data | ||
|
||
# CaseFold | ||
print_table("CaseFold_11_Type CaseFold", fold) | ||
|
||
# CaseFold_Locale | ||
print_table("CaseFold_11_Type CaseFold_Locale", fold_locale) | ||
|
||
# print unfolding data | ||
|
||
# CaseUnfold_11 | ||
print_table("CaseUnfold_11_Type CaseUnfold_11", unfold[0]) | ||
|
||
# CaseUnfold_11_Locale | ||
print_table("CaseUnfold_11_Type CaseUnfold_11_Locale", unfold_locale[0]) | ||
|
||
# CaseUnfold_12 | ||
print_table("CaseUnfold_12_Type CaseUnfold_12", unfold[1]) | ||
|
||
# CaseUnfold_12_Locale | ||
print_table("CaseUnfold_12_Type CaseUnfold_12_Locale", unfold_locale[1]) | ||
|
||
# CaseUnfold_13 | ||
print_table("CaseUnfold_13_Type CaseUnfold_13", unfold[2]) | ||
|
||
# table sizes | ||
fold_table_size = fold.size + fold_locale.size | ||
printf("#define FOLD_TABLE_SIZE\t\t%d\n", (fold_table_size * 1.2)) | ||
unfold1_table_size = unfold[0].size + unfold_locale[0].size | ||
printf("#define UNFOLD1_TABLE_SIZE\t%d\n", (unfold1_table_size * 1.2)) | ||
unfold2_table_size = unfold[1].size + unfold_locale[1].size | ||
printf("#define UNFOLD2_TABLE_SIZE\t%d\n", (unfold2_table_size * 1.5)) | ||
unfold3_table_size = unfold[2].size | ||
printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7)) | ||
end | ||
|
||
filename = ARGV[0] || 'CaseFolding.txt' | ||
print_case_folding_data(filename) |