Skip to content

Commit

Permalink
ICU-8966 ICU-12850 add API/data/code for text layout properties InPC,…
Browse files Browse the repository at this point in the history
… InSC, vo (unicode-org#92)

ICU-8966: Indic_Positional_Category & Indic_Syllabic_Category

ICU-12850: Vertical_Orientation
  • Loading branch information
markusicu authored Sep 1, 2018
1 parent 49443f7 commit e59a297
Show file tree
Hide file tree
Showing 31 changed files with 3,759 additions and 994 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -996,4 +996,5 @@ tools/unicode/c/genuca/genuca.vcproj.*.*.user
tools/unicode/c/genuca/release
tools/unicode/c/genuca/x64
tools/unicode/c/genuca/x86
tools/unicode/c/icudefs.txt
tools/unicodetools/com/ibm/rbm/lib
2,006 changes: 1,059 additions & 947 deletions icu4c/source/common/propname_data.h

Large diffs are not rendered by default.

722 changes: 722 additions & 0 deletions icu4c/source/common/ulayout_props_data.h

Large diffs are not rendered by default.

179 changes: 178 additions & 1 deletion icu4c/source/common/unicode/uchar.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,12 +546,34 @@ typedef enum UProperty {
(http://www.unicode.org/reports/tr9/)
Returns UBidiPairedBracketType values. @stable ICU 52 */
UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
/**
* Enumerated property Indic_Positional_Category.
* New in Unicode 6.0 as provisional property Indic_Matra_Category;
* renamed and changed to informative in Unicode 8.0.
* See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
* @stable ICU 63
*/
UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
/**
* Enumerated property Indic_Syllabic_Category.
* New in Unicode 6.0 as provisional; informative since Unicode 8.0.
* See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
* @stable ICU 63
*/
UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
/**
* Enumerated property Vertical_Orientation.
* Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
* New as a UCD property in Unicode 10.0.
* @stable ICU 63
*/
UCHAR_VERTICAL_ORIENTATION=0x1018,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for enumerated/integer Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_INT_LIMIT=0x1016,
UCHAR_INT_LIMIT=0x1019,
#endif // U_HIDE_DEPRECATED_API

/** Bitmask property General_Category_Mask.
Expand Down Expand Up @@ -2320,6 +2342,161 @@ typedef enum UHangulSyllableType {
#endif // U_HIDE_DEPRECATED_API
} UHangulSyllableType;

/**
* Indic Positional Category constants.
*
* @see UCHAR_INDIC_POSITIONAL_CATEGORY
* @stable ICU 63
*/
typedef enum UIndicPositionalCategory {
/*
* Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
* It matches lines like
* U_INPC_<Unicode Indic_Positional_Category value name>
*/

/** @stable ICU 63 */
U_INPC_NA,
/** @stable ICU 63 */
U_INPC_BOTTOM,
/** @stable ICU 63 */
U_INPC_BOTTOM_AND_LEFT,
/** @stable ICU 63 */
U_INPC_BOTTOM_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_LEFT,
/** @stable ICU 63 */
U_INPC_LEFT_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_OVERSTRUCK,
/** @stable ICU 63 */
U_INPC_RIGHT,
/** @stable ICU 63 */
U_INPC_TOP,
/** @stable ICU 63 */
U_INPC_TOP_AND_BOTTOM,
/** @stable ICU 63 */
U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_TOP_AND_LEFT,
/** @stable ICU 63 */
U_INPC_TOP_AND_LEFT_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_TOP_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_VISUAL_ORDER_LEFT,
} UIndicPositionalCategory;

/**
* Indic Syllabic Category constants.
*
* @see UCHAR_INDIC_SYLLABIC_CATEGORY
* @stable ICU 63
*/
typedef enum UIndicSyllabicCategory {
/*
* Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
* It matches lines like
* U_INSC_<Unicode Indic_Syllabic_Category value name>
*/

/** @stable ICU 63 */
U_INSC_OTHER,
/** @stable ICU 63 */
U_INSC_AVAGRAHA,
/** @stable ICU 63 */
U_INSC_BINDU,
/** @stable ICU 63 */
U_INSC_BRAHMI_JOINING_NUMBER,
/** @stable ICU 63 */
U_INSC_CANTILLATION_MARK,
/** @stable ICU 63 */
U_INSC_CONSONANT,
/** @stable ICU 63 */
U_INSC_CONSONANT_DEAD,
/** @stable ICU 63 */
U_INSC_CONSONANT_FINAL,
/** @stable ICU 63 */
U_INSC_CONSONANT_HEAD_LETTER,
/** @stable ICU 63 */
U_INSC_CONSONANT_INITIAL_POSTFIXED,
/** @stable ICU 63 */
U_INSC_CONSONANT_KILLER,
/** @stable ICU 63 */
U_INSC_CONSONANT_MEDIAL,
/** @stable ICU 63 */
U_INSC_CONSONANT_PLACEHOLDER,
/** @stable ICU 63 */
U_INSC_CONSONANT_PRECEDING_REPHA,
/** @stable ICU 63 */
U_INSC_CONSONANT_PREFIXED,
/** @stable ICU 63 */
U_INSC_CONSONANT_SUBJOINED,
/** @stable ICU 63 */
U_INSC_CONSONANT_SUCCEEDING_REPHA,
/** @stable ICU 63 */
U_INSC_CONSONANT_WITH_STACKER,
/** @stable ICU 63 */
U_INSC_GEMINATION_MARK,
/** @stable ICU 63 */
U_INSC_INVISIBLE_STACKER,
/** @stable ICU 63 */
U_INSC_JOINER,
/** @stable ICU 63 */
U_INSC_MODIFYING_LETTER,
/** @stable ICU 63 */
U_INSC_NON_JOINER,
/** @stable ICU 63 */
U_INSC_NUKTA,
/** @stable ICU 63 */
U_INSC_NUMBER,
/** @stable ICU 63 */
U_INSC_NUMBER_JOINER,
/** @stable ICU 63 */
U_INSC_PURE_KILLER,
/** @stable ICU 63 */
U_INSC_REGISTER_SHIFTER,
/** @stable ICU 63 */
U_INSC_SYLLABLE_MODIFIER,
/** @stable ICU 63 */
U_INSC_TONE_LETTER,
/** @stable ICU 63 */
U_INSC_TONE_MARK,
/** @stable ICU 63 */
U_INSC_VIRAMA,
/** @stable ICU 63 */
U_INSC_VISARGA,
/** @stable ICU 63 */
U_INSC_VOWEL,
/** @stable ICU 63 */
U_INSC_VOWEL_DEPENDENT,
/** @stable ICU 63 */
U_INSC_VOWEL_INDEPENDENT,
} UIndicSyllabicCategory;

/**
* Vertical Orientation constants.
*
* @see UCHAR_VERTICAL_ORIENTATION
* @stable ICU 63
*/
typedef enum UVerticalOrientation {
/*
* Note: UVerticalOrientation constants are parsed by preparseucd.py.
* It matches lines like
* U_VO_<Unicode Vertical_Orientation value name>
*/

/** @stable ICU 63 */
U_VO_ROTATED,
/** @stable ICU 63 */
U_VO_TRANSFORMED_ROTATED,
/** @stable ICU 63 */
U_VO_TRANSFORMED_UPRIGHT,
/** @stable ICU 63 */
U_VO_UPRIGHT,
} UVerticalOrientation;

/**
* Check a binary Unicode property for a code point.
*
Expand Down
5 changes: 5 additions & 0 deletions icu4c/source/common/uniset_props.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) {
case UPROPS_SRC_BIDI:
ubidi_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_INPC:
case UPROPS_SRC_INSC:
case UPROPS_SRC_VO:
uprops_addPropertyStarts((UPropertySource)src, &sa, &status);
break;
default:
status = U_INTERNAL_PROGRAM_ERROR;
break;
Expand Down
48 changes: 48 additions & 0 deletions icu4c/source/common/uprops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/ucptrie.h"
#include "unicode/unorm2.h"
#include "unicode/uscript.h"
#include "unicode/ustring.h"
Expand All @@ -36,6 +37,10 @@
#include "ucase.h"
#include "ustr_imp.h"

// ulayout_props_data.h is machine-generated by genprops
#define INCLUDED_FROM_UPROPS_CPP
#include "ulayout_props_data.h"

U_NAMESPACE_USE

/* general properties API functions ----------------------------------------- */
Expand Down Expand Up @@ -428,6 +433,18 @@ static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UP
}
#endif

static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) {
return ucptrie_get(&inpc_trie, c);
}

static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) {
return ucptrie_get(&insc_trie, c);
}

static int32_t getVo(const IntProperty &, UChar32 c, UProperty) {
return ucptrie_get(&vo_trie, c);
}

static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
/*
* column, mask and shift values for int-value properties from u_getUnicodeProperties().
Expand Down Expand Up @@ -463,6 +480,9 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
{ 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue },
{ 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue },
{ UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue },
{ UPROPS_SRC_INPC, 0, maxInPCValue, getInPC, getMaxValueFromShift },
{ UPROPS_SRC_INSC, 0, maxInSCValue, getInSC, getMaxValueFromShift },
{ UPROPS_SRC_VO, 0, maxVoValue, getVo, getMaxValueFromShift },
};

U_CAPI int32_t U_EXPORT2
Expand Down Expand Up @@ -564,6 +584,34 @@ uprops_getSource(UProperty which) {
}
}

U_CFUNC void U_EXPORT2
uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) { return; }
const UCPTrie *trie;
switch (src) {
case UPROPS_SRC_INPC:
trie = &inpc_trie;
break;
case UPROPS_SRC_INSC:
trie = &insc_trie;
break;
case UPROPS_SRC_VO:
trie = &vo_trie;
break;
default:
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

// Add the start code point of each same-value range of the trie.
UChar32 start = 0, end;
while ((end = ucptrie_getRange(trie, start, UCPTRIE_RANGE_NORMAL, 0,
nullptr, nullptr, nullptr)) >= 0) {
sa->add(sa->set, start);
start = end + 1;
}
}

#if !UCONFIG_NO_NORMALIZATION

U_CAPI int32_t U_EXPORT2
Expand Down
7 changes: 7 additions & 0 deletions icu4c/source/common/uprops.h
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,10 @@ enum UPropertySource {
UPROPS_SRC_NFKC_CF,
/** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
UPROPS_SRC_NFC_CANON_ITER,
// Text layout properties.
UPROPS_SRC_INPC,
UPROPS_SRC_INSC,
UPROPS_SRC_VO,
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
UPROPS_SRC_COUNT
};
Expand Down Expand Up @@ -425,6 +429,9 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
U_CFUNC void U_EXPORT2
upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);

U_CFUNC void U_EXPORT2
uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode);

/**
* Return a set of characters for property enumeration.
* For each two consecutive characters (start, limit) in the set,
Expand Down
Binary file modified icu4c/source/data/in/pnames.icu
Binary file not shown.
Loading

0 comments on commit e59a297

Please sign in to comment.