2010-01-19 19:25:25 +00:00
/********************************************************************
2010-02-24 23:52:27 +00:00
* COPYRIGHT :
2010-01-19 19:25:25 +00:00
* Copyright ( c ) 1997 - 2010 , International Business Machines Corporation and
* others . All Rights Reserved .
2010-02-24 23:52:27 +00:00
* Copyright ( C ) 2010 , Yahoo ! Inc .
2010-01-19 19:25:25 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* File SELFMT . H
*
* Modification History :
*
* Date Name Description
* 11 / 11 / 09 kirtig Finished first cut of implementation .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef SELFMT
# define SELFMT
# include "unicode/utypes.h"
# include "unicode/numfmt.h"
/**
* \ file
* \ brief C + + API : SelectFormat object
*/
# if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
class Hashtable ;
/**
* < p > < code > SelectFormat < / code > supports the creation of internationalized
* messages by selecting phrases based on keywords . The pattern specifies
* how to map keywords to phrases and provides a default phrase . The
* object provided to the format method is a string that ' s matched
* against the keywords . If there is a match , the corresponding phrase
* is selected ; otherwise , the default phrase is used . < / p >
*
* < h4 > Using < code > SelectFormat < / code > for Gender Agreement < / h4 >
*
* < p > The main use case for the select format is gender based inflection .
* When names or nouns are inserted into sentences , their gender can affect pronouns ,
* verb forms , articles , and adjectives . Special care needs to be
* taken for the case where the gender cannot be determined .
* The impact varies between languages : < / p >
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-01-19 19:25:25 +00:00
* < ul >
* < li > English has three genders , and unknown gender is handled as a special
* case . Names use the gender of the named person ( if known ) , nouns referring
* to people use natural gender , and inanimate objects are usually neutral .
* The gender only affects pronouns : " he " , " she " , " it " , " they " .
*
* < li > German differs from English in that the gender of nouns is rather
2010-01-20 00:20:05 +00:00
* arbitrary , even for nouns referring to people ( " Mädchen " , girl , is neutral ) .
2010-01-19 19:25:25 +00:00
* The gender affects pronouns ( " er " , " sie " , " es " ) , articles ( " der " , " die " ,
2010-01-20 00:20:05 +00:00
* " das " ) , and adjective forms ( " guter Mann " , " gute Frau " , " gutes Mädchen " ) .
2010-01-19 19:25:25 +00:00
*
* < li > French has only two genders ; as in German the gender of nouns
2010-01-20 20:04:28 +00:00
* is rather arbitrary - for sun and moon , the genders
2010-01-19 19:25:25 +00:00
* are the opposite of those in German . The gender affects
* pronouns ( " il " , " elle " ) , articles ( " le " , " la " ) ,
* adjective forms ( " bon " , " bonne " ) , and sometimes
2010-01-20 00:20:05 +00:00
* verb forms ( " allé " , " allée " ) .
2010-01-19 19:25:25 +00:00
*
* < li > Polish distinguishes five genders ( or noun classes ) ,
* human masculine , animate non - human masculine , inanimate masculine ,
* feminine , and neuter .
* < / ul >
2010-01-20 00:20:05 +00:00
* \ endhtmlonly
2010-01-19 19:25:25 +00:00
* < p > Some other languages have noun classes that are not related to gender ,
* but similar in grammatical use .
* Some African languages have around 20 noun classes . < / p >
*
* < p > To enable localizers to create sentence patterns that take their
* language ' s gender dependencies into consideration , software has to provide
* information about the gender associated with a noun or name to
* < code > MessageFormat < / code > .
* Two main cases can be distinguished : < / p >
*
* < ul >
* < li > For people , natural gender information should be maintained for each person .
* The keywords " male " , " female " , " mixed " ( for groups of people )
* and " unknown " are used .
*
* < li > For nouns , grammatical gender information should be maintained for
* each noun and per language , e . g . , in resource bundles .
* The keywords " masculine " , " feminine " , and " neuter " are commonly used ,
* but some languages may require other keywords .
* < / ul >
*
* < p > The resulting keyword is provided to < code > MessageFormat < / code > as a
* parameter separate from the name or noun it ' s associated with . For example ,
* to generate a message such as " Jean went to Paris " , three separate arguments
* would be provided : The name of the person as argument 0 , the gender of
* the person as argument 1 , and the name of the city as argument 2.
* The sentence pattern for English , where the gender of the person has
* no impact on this simple sentence , would not refer to argument 1 at all : < / p >
*
* < pre > { 0 } went to { 2 } . < / pre >
*
* < p > The sentence pattern for French , where the gender of the person affects
* the form of the participle , uses a select format based on argument 1 : < / p >
*
2010-01-20 00:20:05 +00:00
* \ htmlonly < pre > { 0 } est { 1 , select , female { all & # x00E9 ; e } other { all & # x00E9 ; } } & # x00E0 ; { 2 } . < / pre > \ endhtmlonly
2010-01-19 19:25:25 +00:00
*
* < p > Patterns can be nested , so that it ' s possible to handle interactions of
* number and gender where necessary . For example , if the above sentence should
* allow for the names of several people to be inserted , the following sentence
2010-02-24 23:52:27 +00:00
* pattern can be used ( with argument 0 the list of people ' s names ,
* argument 1 the number of people , argument 2 their combined gender , and
2010-01-19 19:25:25 +00:00
* argument 3 the city name ) : < / p >
*
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-02-24 23:52:27 +00:00
* < pre > { 0 } { 1 , plural ,
2010-01-20 00:20:05 +00:00
* one { est { 2 , select , female { all & # x00E9 ; e } other { all & # x00E9 ; } } }
* other { sont { 2 , select , female { all & # x00E9 ; es } other { all & # x00E9 ; s } } }
* } & # x00E0 ; { 3 } . < / pre >
* \ endhtmlonly
2010-01-19 19:25:25 +00:00
*
* < h4 > Patterns and Their Interpretation < / h4 >
*
* < p > The < code > SelectFormat < / code > pattern text defines the phrase output
* for each user - defined keyword .
* The pattern is a sequence of < code > < i > keyword < / i > { < i > phrase < / i > } < / code >
2010-02-24 23:52:27 +00:00
* clauses .
2010-01-19 19:25:25 +00:00
* Each clause assigns the phrase < code > < i > phrase < / i > < / code >
* to the user - defined < code > < i > keyword < / i > < / code > . < / p >
*
* < p > Keywords must match the pattern [ a - zA - Z ] [ a - zA - Z0 - 9 _ - ] * ; keywords
* that don ' t match this pattern result in the error code
* < code > U_ILLEGAL_CHARACTER < / code > .
* You always have to define a phrase for the default keyword
2010-02-24 23:52:27 +00:00
* < code > other < / code > ; this phrase is returned when the keyword
2010-01-19 19:25:25 +00:00
* provided to
* the < code > format < / code > method matches no other keyword .
* If a pattern does not provide a phrase for < code > other < / code > , the method
* it ' s provided to returns the error < code > U_DEFAULT_KEYWORD_MISSING < / code > .
* If a pattern provides more than one phrase for the same keyword , the
* error < code > U_DUPLICATE_KEYWORD < / code > is returned .
2010-02-11 00:53:57 +00:00
* < br >
2010-01-19 19:25:25 +00:00
* Spaces between < code > < i > keyword < / i > < / code > and
* < code > { < i > phrase < / i > } < / code > will be ignored ; spaces within
* < code > { < i > phrase < / i > } < / code > will be preserved . < p >
*
* < p > The phrase for a particular select case may contain other message
* format patterns . < code > SelectFormat < / code > preserves these so that you
* can use the strings produced by < code > SelectFormat < / code > with other
* formatters . If you are using < code > SelectFormat < / code > inside a
* < code > MessageFormat < / code > pattern , < code > MessageFormat < / code > will
* automatically evaluate the resulting format pattern .
* Thus , curly braces ( < code > { < / code > , < code > } < / code > ) are < i > only < / i > allowed
* in phrases to define a nested format pattern . < / p >
*
* < p > Example :
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-01-19 19:25:25 +00:00
*
* UErrorCode status = U_ZERO_ERROR ;
2010-01-20 00:20:05 +00:00
* MessageFormat * msgFmt = new MessageFormat ( UnicodeString ( " {0} est {1, select, female {allée} other {allé}} à Paris. " ) , Locale ( " fr " ) , status ) ;
2010-01-19 19:25:25 +00:00
* if ( U_FAILURE ( status ) ) {
* return ;
* }
* FieldPosition ignore ( FieldPosition : : DONT_CARE ) ;
* UnicodeString result ;
*
* char * str1 = " Kirti,female " ;
* Formattable args1 [ ] = { " Kirti " , " female " } ;
* msgFmt - > format ( args1 , 2 , result , ignore , status ) ;
* cout < < " Input is " < < str1 < < " and result is: " < < result < < endl ;
* delete msgFmt ;
*
2010-01-20 00:20:05 +00:00
* \ endhtmlonly
2010-01-28 01:23:47 +00:00
* < / p >
2010-01-20 00:20:05 +00:00
*
2010-02-11 00:53:57 +00:00
* Produces the output : < br >
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-01-28 01:23:47 +00:00
* < code > Kirti est all & # x00E9 ; e & # x00E0 ; Paris . < / code >
2010-01-20 00:20:05 +00:00
* \ endhtmlonly
2010-01-19 19:25:25 +00:00
*
* @ draft ICU 4.4
*/
class U_I18N_API SelectFormat : public Format {
public :
/**
* Creates a new < code > SelectFormat < / code > for a given pattern string .
* @ param pattern the pattern for this < code > SelectFormat < / code > .
* errors are returned to status if the pattern is invalid .
* @ param status output param set to success / failure code on exit , which
* must not indicate a failure before the function call .
* @ draft ICU 4.4
*/
SelectFormat ( const UnicodeString & pattern , UErrorCode & status ) ;
/**
* copy constructor .
* @ draft ICU 4.4
*/
SelectFormat ( const SelectFormat & other ) ;
/**
* Destructor .
* @ draft ICU 4.4
*/
virtual ~ SelectFormat ( ) ;
/**
* Sets the pattern used by this select format .
* for the keyword rules .
* Patterns and their interpretation are specified in the class description .
*
* @ param pattern the pattern for this select format
* errors are returned to status if the pattern is invalid .
* @ param status output param set to success / failure code on exit , which
* must not indicate a failure before the function call .
* @ draft ICU 4.4
*/
void applyPattern ( const UnicodeString & pattern , UErrorCode & status ) ;
2010-02-24 23:52:27 +00:00
using Format : : format ;
2010-01-19 19:25:25 +00:00
/**
* Selects the phrase for the given keyword
*
2010-02-24 23:52:27 +00:00
* @ param keyword The keyword that is used to select an alternative .
2010-01-19 19:25:25 +00:00
* @ param appendTo output parameter to receive result .
* result is appended to existing contents .
* @ param pos On input : an alignment field , if desired .
* On output : the offsets of the alignment field .
* @ param status output param set to success / failure code on exit , which
* must not indicate a failure before the function call .
2010-02-24 23:52:27 +00:00
* @ return Reference to ' appendTo ' parameter .
2010-01-19 19:25:25 +00:00
* @ draft ICU 4.4
*/
UnicodeString & format ( const UnicodeString & keyword ,
UnicodeString & appendTo ,
FieldPosition & pos ,
UErrorCode & status ) const ;
/**
* Assignment operator
*
* @ param other the SelectFormat object to copy from .
* @ draft ICU 4.4
*/
SelectFormat & operator = ( const SelectFormat & other ) ;
/**
* Return true if another object is semantically equal to this one .
*
* @ param other the SelectFormat object to be compared with .
* @ return true if other is semantically equal to this .
* @ draft ICU 4.4
*/
virtual UBool operator = = ( const Format & other ) const ;
/**
* Return true if another object is semantically unequal to this one .
*
* @ param other the SelectFormat object to be compared with .
* @ return true if other is semantically unequal to this .
* @ draft ICU 4.4
*/
virtual UBool operator ! = ( const Format & other ) const ;
/**
* Clones this Format object polymorphically . The caller owns the
* result and should delete it when done .
* @ draft ICU 4.4
*/
virtual Format * clone ( void ) const ;
/**
2010-02-24 23:52:27 +00:00
* Format an object to produce a string .
* This method handles keyword strings .
* If the Formattable object is not a < code > UnicodeString < / code > ,
2010-01-19 19:25:25 +00:00
* then it returns a failing UErrorCode .
*
2010-02-24 23:52:27 +00:00
* @ param obj A keyword string that is used to select an alternative .
2010-01-19 19:25:25 +00:00
* @ param appendTo output parameter to receive result .
* Result is appended to existing contents .
* @ param pos On input : an alignment field , if desired .
* On output : the offsets of the alignment field .
* @ param status output param filled with success / failure status .
* @ return Reference to ' appendTo ' parameter .
* @ draft ICU 4.4
*/
UnicodeString & format ( const Formattable & obj ,
UnicodeString & appendTo ,
FieldPosition & pos ,
UErrorCode & status ) const ;
/**
* Returns the pattern from applyPattern ( ) or constructor .
*
* @ param appendTo output parameter to receive result .
* Result is appended to existing contents .
* @ return the UnicodeString with inserted pattern .
* @ draft ICU 4.4
*/
UnicodeString & toPattern ( UnicodeString & appendTo ) ;
/**
* This method is not yet supported by < code > SelectFormat < / code > .
* < P >
* Before calling , set parse_pos . index to the offset you want to start
* parsing at in the source . After calling , parse_pos . index is the end of
* the text you parsed . If error occurs , index is unchanged .
* < P >
* When parsing , leading whitespace is discarded ( with a successful parse ) ,
* while trailing whitespace is left as is .
* < P >
* See Format : : parseObject ( ) for more .
*
* @ param source The string to be parsed into an object .
* @ param result Formattable to be set to the parse result .
* If parse fails , return contents are undefined .
* @ param parse_pos The position to start parsing at . Upon return
* this param is set to the position after the
* last character successfully parsed . If the
* source is not parsed successfully , this param
* will remain unchanged .
* @ draft ICU 4.4
*/
virtual void parseObject ( const UnicodeString & source ,
Formattable & result ,
ParsePosition & parse_pos ) const ;
/**
* ICU " poor man's RTTI " , returns a UClassID for this class .
2010-01-20 00:20:05 +00:00
* @ draft ICU 4.4
2010-01-19 19:25:25 +00:00
*/
static UClassID U_EXPORT2 getStaticClassID ( void ) ;
/**
* ICU " poor man's RTTI " , returns a UClassID for the actual class .
* @ draft ICU 4.4
*/
virtual UClassID getDynamicClassID ( ) const ;
private :
2010-02-01 23:32:57 +00:00
typedef enum classesForSelectFormat {
2010-01-19 19:25:25 +00:00
tStartKeyword ,
tContinueKeyword ,
tLeftBrace ,
tRightBrace ,
tSpace ,
tOther
2010-02-01 23:32:57 +00:00
} CharacterClass ;
2010-01-19 19:25:25 +00:00
UnicodeString pattern ;
2010-01-28 01:23:47 +00:00
//Hash to store the keyword, phrase pairs.
2010-01-19 19:25:25 +00:00
Hashtable * parsedValuesHash ;
2010-01-28 01:23:47 +00:00
SelectFormat ( ) ; // default constructor not implemented.
2010-01-19 19:25:25 +00:00
void init ( UErrorCode & status ) ;
2010-01-28 01:23:47 +00:00
//For the applyPattern , classifies char.s in one of the characterClass.
2010-02-24 23:52:27 +00:00
CharacterClass classifyCharacter ( UChar ch ) const ;
2010-01-28 01:23:47 +00:00
//Checks if the "other" keyword is present in pattern.
2010-01-19 19:25:25 +00:00
UBool checkSufficientDefinition ( ) ;
2010-01-28 01:23:47 +00:00
//Checks if the keyword passed is valid.
2010-01-19 19:25:25 +00:00
UBool checkValidKeyword ( const UnicodeString & argKeyword ) const ;
void parsingFailure ( ) ;
void copyHashtable ( Hashtable * other , UErrorCode & status ) ;
} ;
U_NAMESPACE_END
# endif /* #if !UCONFIG_NO_FORMATTING */
# endif // _SELFMT
//eof