2017-01-20 00:20:31 +00:00
// © 2016 and later: Unicode, Inc. and others.
2016-06-15 18:58:17 +00:00
// License & terms of use: http://www.unicode.org/copyright.html
2010-01-19 19:25:25 +00:00
/********************************************************************
2010-02-24 23:52:27 +00:00
* COPYRIGHT :
2016-05-31 21:45:07 +00:00
* Copyright ( c ) 1997 - 2011 , International Business Machines Corporation and
* others . All Rights Reserved .
* Copyright ( C ) 2010 , Yahoo ! Inc .
2010-01-19 19:25:25 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
* File SELFMT . H
*
* Modification History :
*
* Date Name Description
* 11 / 11 / 09 kirtig Finished first cut of implementation .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef SELFMT
# define SELFMT
2011-04-25 20:47:32 +00:00
# include "unicode/messagepattern.h"
2010-01-19 19:25:25 +00:00
# include "unicode/numfmt.h"
2011-04-25 20:47:32 +00:00
# include "unicode/utypes.h"
2010-01-19 19:25:25 +00:00
/**
* \ file
* \ brief C + + API : SelectFormat object
*/
# if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
2011-04-25 20:47:32 +00:00
class MessageFormat ;
2010-01-19 19:25:25 +00:00
/**
* < p > < code > SelectFormat < / code > supports the creation of internationalized
* messages by selecting phrases based on keywords . The pattern specifies
* how to map keywords to phrases and provides a default phrase . The
* object provided to the format method is a string that ' s matched
* against the keywords . If there is a match , the corresponding phrase
* is selected ; otherwise , the default phrase is used . < / p >
*
* < h4 > Using < code > SelectFormat < / code > for Gender Agreement < / h4 >
*
2011-04-25 20:47:32 +00:00
* < p > Note : Typically , select formatting is done via < code > MessageFormat < / code >
* with a < code > select < / code > argument type ,
* rather than using a stand - alone < code > SelectFormat < / code > . < / p >
*
2010-01-19 19:25:25 +00:00
* < p > The main use case for the select format is gender based inflection .
* When names or nouns are inserted into sentences , their gender can affect pronouns ,
* verb forms , articles , and adjectives . Special care needs to be
* taken for the case where the gender cannot be determined .
* The impact varies between languages : < / p >
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-01-19 19:25:25 +00:00
* < ul >
* < li > English has three genders , and unknown gender is handled as a special
* case . Names use the gender of the named person ( if known ) , nouns referring
* to people use natural gender , and inanimate objects are usually neutral .
* The gender only affects pronouns : " he " , " she " , " it " , " they " .
*
* < li > German differs from English in that the gender of nouns is rather
2010-01-20 00:20:05 +00:00
* arbitrary , even for nouns referring to people ( " Mädchen " , girl , is neutral ) .
2010-01-19 19:25:25 +00:00
* The gender affects pronouns ( " er " , " sie " , " es " ) , articles ( " der " , " die " ,
2010-01-20 00:20:05 +00:00
* " das " ) , and adjective forms ( " guter Mann " , " gute Frau " , " gutes Mädchen " ) .
2010-01-19 19:25:25 +00:00
*
* < li > French has only two genders ; as in German the gender of nouns
2010-01-20 20:04:28 +00:00
* is rather arbitrary - for sun and moon , the genders
2010-01-19 19:25:25 +00:00
* are the opposite of those in German . The gender affects
* pronouns ( " il " , " elle " ) , articles ( " le " , " la " ) ,
* adjective forms ( " bon " , " bonne " ) , and sometimes
2010-01-20 00:20:05 +00:00
* verb forms ( " allé " , " allée " ) .
2010-01-19 19:25:25 +00:00
*
* < li > Polish distinguishes five genders ( or noun classes ) ,
* human masculine , animate non - human masculine , inanimate masculine ,
* feminine , and neuter .
* < / ul >
2010-01-20 00:20:05 +00:00
* \ endhtmlonly
2010-01-19 19:25:25 +00:00
* < p > Some other languages have noun classes that are not related to gender ,
* but similar in grammatical use .
* Some African languages have around 20 noun classes . < / p >
*
2011-04-25 20:47:32 +00:00
* < p > < b > Note : < / b > For the gender of a < i > person < / i > in a given sentence ,
* we usually need to distinguish only between female , male and other / unknown . < / p >
*
2010-01-19 19:25:25 +00:00
* < p > To enable localizers to create sentence patterns that take their
* language ' s gender dependencies into consideration , software has to provide
* information about the gender associated with a noun or name to
* < code > MessageFormat < / code > .
* Two main cases can be distinguished : < / p >
*
* < ul >
* < li > For people , natural gender information should be maintained for each person .
2011-04-25 20:47:32 +00:00
* Keywords like " male " , " female " , " mixed " ( for groups of people )
* and " unknown " could be used .
2010-01-19 19:25:25 +00:00
*
* < li > For nouns , grammatical gender information should be maintained for
* each noun and per language , e . g . , in resource bundles .
* The keywords " masculine " , " feminine " , and " neuter " are commonly used ,
* but some languages may require other keywords .
* < / ul >
*
* < p > The resulting keyword is provided to < code > MessageFormat < / code > as a
* parameter separate from the name or noun it ' s associated with . For example ,
* to generate a message such as " Jean went to Paris " , three separate arguments
* would be provided : The name of the person as argument 0 , the gender of
* the person as argument 1 , and the name of the city as argument 2.
* The sentence pattern for English , where the gender of the person has
* no impact on this simple sentence , would not refer to argument 1 at all : < / p >
*
* < pre > { 0 } went to { 2 } . < / pre >
*
2011-04-25 20:47:32 +00:00
* < p > < b > Note : < / b > The entire sentence should be included ( and partially repeated )
* inside each phrase . Otherwise translators would have to be trained on how to
* move bits of the sentence in and out of the select argument of a message .
* ( The examples below do not follow this recommendation ! ) < / p >
*
2010-01-19 19:25:25 +00:00
* < p > The sentence pattern for French , where the gender of the person affects
* the form of the participle , uses a select format based on argument 1 : < / p >
*
2010-01-20 00:20:05 +00:00
* \ htmlonly < pre > { 0 } est { 1 , select , female { all & # x00E9 ; e } other { all & # x00E9 ; } } & # x00E0 ; { 2 } . < / pre > \ endhtmlonly
2010-01-19 19:25:25 +00:00
*
* < p > Patterns can be nested , so that it ' s possible to handle interactions of
* number and gender where necessary . For example , if the above sentence should
* allow for the names of several people to be inserted , the following sentence
2010-02-24 23:52:27 +00:00
* pattern can be used ( with argument 0 the list of people ' s names ,
* argument 1 the number of people , argument 2 their combined gender , and
2010-01-19 19:25:25 +00:00
* argument 3 the city name ) : < / p >
*
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-02-24 23:52:27 +00:00
* < pre > { 0 } { 1 , plural ,
2010-01-20 00:20:05 +00:00
* one { est { 2 , select , female { all & # x00E9 ; e } other { all & # x00E9 ; } } }
* other { sont { 2 , select , female { all & # x00E9 ; es } other { all & # x00E9 ; s } } }
* } & # x00E0 ; { 3 } . < / pre >
* \ endhtmlonly
2010-01-19 19:25:25 +00:00
*
* < h4 > Patterns and Their Interpretation < / h4 >
*
2011-04-25 20:47:32 +00:00
* < p > The < code > SelectFormat < / code > pattern string defines the phrase output
2010-01-19 19:25:25 +00:00
* for each user - defined keyword .
2011-04-25 20:47:32 +00:00
* The pattern is a sequence of ( keyword , message ) pairs .
* A keyword is a " pattern identifier " : [ ^ [ [ : Pattern_Syntax : ] [ : Pattern_White_Space : ] ] ] + < / p >
*
* < p > Each message is a MessageFormat pattern string enclosed in { curly braces } . < / p >
2010-01-19 19:25:25 +00:00
*
2011-04-25 20:47:32 +00:00
* < p > You always have to define a phrase for the default keyword
2010-02-24 23:52:27 +00:00
* < code > other < / code > ; this phrase is returned when the keyword
2010-01-19 19:25:25 +00:00
* provided to
* the < code > format < / code > method matches no other keyword .
* If a pattern does not provide a phrase for < code > other < / code > , the method
* it ' s provided to returns the error < code > U_DEFAULT_KEYWORD_MISSING < / code > .
2010-02-11 00:53:57 +00:00
* < br >
2011-04-25 20:47:32 +00:00
* Pattern_White_Space between keywords and messages is ignored .
* Pattern_White_Space within a message is preserved and output . < / p >
2010-01-19 19:25:25 +00:00
*
2011-04-25 20:47:32 +00:00
* < p > < pre > Example :
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-01-19 19:25:25 +00:00
*
* UErrorCode status = U_ZERO_ERROR ;
2010-01-20 00:20:05 +00:00
* MessageFormat * msgFmt = new MessageFormat ( UnicodeString ( " {0} est {1, select, female {allée} other {allé}} à Paris. " ) , Locale ( " fr " ) , status ) ;
2010-01-19 19:25:25 +00:00
* if ( U_FAILURE ( status ) ) {
* return ;
* }
* FieldPosition ignore ( FieldPosition : : DONT_CARE ) ;
* UnicodeString result ;
*
* char * str1 = " Kirti,female " ;
* Formattable args1 [ ] = { " Kirti " , " female " } ;
* msgFmt - > format ( args1 , 2 , result , ignore , status ) ;
* cout < < " Input is " < < str1 < < " and result is: " < < result < < endl ;
* delete msgFmt ;
*
2010-01-20 00:20:05 +00:00
* \ endhtmlonly
2011-05-05 01:35:22 +00:00
* < / pre >
2010-01-28 01:23:47 +00:00
* < / p >
2010-01-20 00:20:05 +00:00
*
2010-02-11 00:53:57 +00:00
* Produces the output : < br >
2010-01-20 00:20:05 +00:00
* \ htmlonly
2010-01-28 01:23:47 +00:00
* < code > Kirti est all & # x00E9 ; e & # x00E0 ; Paris . < / code >
2010-01-20 00:20:05 +00:00
* \ endhtmlonly
2010-01-19 19:25:25 +00:00
*
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
class U_I18N_API SelectFormat : public Format {
public :
/**
* Creates a new < code > SelectFormat < / code > for a given pattern string .
* @ param pattern the pattern for this < code > SelectFormat < / code > .
* errors are returned to status if the pattern is invalid .
* @ param status output param set to success / failure code on exit , which
* must not indicate a failure before the function call .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
SelectFormat ( const UnicodeString & pattern , UErrorCode & status ) ;
/**
* copy constructor .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
SelectFormat ( const SelectFormat & other ) ;
/**
* Destructor .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
virtual ~ SelectFormat ( ) ;
/**
* Sets the pattern used by this select format .
* for the keyword rules .
* Patterns and their interpretation are specified in the class description .
*
* @ param pattern the pattern for this select format
* errors are returned to status if the pattern is invalid .
* @ param status output param set to success / failure code on exit , which
* must not indicate a failure before the function call .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
void applyPattern ( const UnicodeString & pattern , UErrorCode & status ) ;
2010-02-24 23:52:27 +00:00
using Format : : format ;
2010-01-19 19:25:25 +00:00
/**
* Selects the phrase for the given keyword
*
2010-02-24 23:52:27 +00:00
* @ param keyword The keyword that is used to select an alternative .
2010-01-19 19:25:25 +00:00
* @ param appendTo output parameter to receive result .
* result is appended to existing contents .
* @ param pos On input : an alignment field , if desired .
* On output : the offsets of the alignment field .
* @ param status output param set to success / failure code on exit , which
* must not indicate a failure before the function call .
2010-02-24 23:52:27 +00:00
* @ return Reference to ' appendTo ' parameter .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
UnicodeString & format ( const UnicodeString & keyword ,
UnicodeString & appendTo ,
FieldPosition & pos ,
UErrorCode & status ) const ;
/**
* Assignment operator
*
* @ param other the SelectFormat object to copy from .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
SelectFormat & operator = ( const SelectFormat & other ) ;
/**
* Return true if another object is semantically equal to this one .
*
* @ param other the SelectFormat object to be compared with .
* @ return true if other is semantically equal to this .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
virtual UBool operator = = ( const Format & other ) const ;
/**
* Return true if another object is semantically unequal to this one .
*
* @ param other the SelectFormat object to be compared with .
* @ return true if other is semantically unequal to this .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
virtual UBool operator ! = ( const Format & other ) const ;
/**
* Clones this Format object polymorphically . The caller owns the
* result and should delete it when done .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
virtual Format * clone ( void ) const ;
/**
2010-02-24 23:52:27 +00:00
* Format an object to produce a string .
* This method handles keyword strings .
* If the Formattable object is not a < code > UnicodeString < / code > ,
2010-01-19 19:25:25 +00:00
* then it returns a failing UErrorCode .
*
2010-02-24 23:52:27 +00:00
* @ param obj A keyword string that is used to select an alternative .
2010-01-19 19:25:25 +00:00
* @ param appendTo output parameter to receive result .
* Result is appended to existing contents .
* @ param pos On input : an alignment field , if desired .
* On output : the offsets of the alignment field .
* @ param status output param filled with success / failure status .
* @ return Reference to ' appendTo ' parameter .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
UnicodeString & format ( const Formattable & obj ,
UnicodeString & appendTo ,
FieldPosition & pos ,
UErrorCode & status ) const ;
/**
* Returns the pattern from applyPattern ( ) or constructor .
*
* @ param appendTo output parameter to receive result .
* Result is appended to existing contents .
* @ return the UnicodeString with inserted pattern .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
UnicodeString & toPattern ( UnicodeString & appendTo ) ;
/**
* This method is not yet supported by < code > SelectFormat < / code > .
* < P >
* Before calling , set parse_pos . index to the offset you want to start
* parsing at in the source . After calling , parse_pos . index is the end of
* the text you parsed . If error occurs , index is unchanged .
* < P >
* When parsing , leading whitespace is discarded ( with a successful parse ) ,
* while trailing whitespace is left as is .
* < P >
* See Format : : parseObject ( ) for more .
*
* @ param source The string to be parsed into an object .
* @ param result Formattable to be set to the parse result .
* If parse fails , return contents are undefined .
* @ param parse_pos The position to start parsing at . Upon return
* this param is set to the position after the
* last character successfully parsed . If the
* source is not parsed successfully , this param
* will remain unchanged .
2010-11-12 06:03:28 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
virtual void parseObject ( const UnicodeString & source ,
Formattable & result ,
ParsePosition & parse_pos ) const ;
/**
* ICU " poor man's RTTI " , returns a UClassID for this class .
2010-11-12 22:58:50 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
static UClassID U_EXPORT2 getStaticClassID ( void ) ;
/**
* ICU " poor man's RTTI " , returns a UClassID for the actual class .
2010-11-12 22:58:50 +00:00
* @ stable ICU 4.4
2010-01-19 19:25:25 +00:00
*/
virtual UClassID getDynamicClassID ( ) const ;
private :
2011-04-25 20:47:32 +00:00
friend class MessageFormat ;
2010-01-19 19:25:25 +00:00
2010-01-28 01:23:47 +00:00
SelectFormat ( ) ; // default constructor not implemented.
2010-10-20 02:36:31 +00:00
2011-04-25 20:47:32 +00:00
/**
* Finds the SelectFormat sub - message for the given keyword , or the " other " sub - message .
* @ param pattern A MessagePattern .
* @ param partIndex the index of the first SelectFormat argument style part .
* @ param keyword a keyword to be matched to one of the SelectFormat argument ' s keywords .
* @ param ec Error code .
* @ return the sub - message start part index .
*/
static int32_t findSubMessage ( const MessagePattern & pattern , int32_t partIndex ,
const UnicodeString & keyword , UErrorCode & ec ) ;
MessagePattern msgPattern ;
2010-01-19 19:25:25 +00:00
} ;
U_NAMESPACE_END
# endif /* #if !UCONFIG_NO_FORMATTING */
# endif // _SELFMT
//eof