scuffed-code/icu4c/source/i18n/ucoltok.c

#include "unicode/ustring.h"

#include "cmemory.h"
#include "ucoltok.h"
#include "uhash.h"
#include "ucmp32.h"

static UHashtable *uchars2tokens;
static UColTokListHeader ListList[256];
static uint32_t listPosition = 0;

static const UChar *rulesToParse = 0;

/* will use a small structure, tokHash */

U_CAPI int32_t
uhash_hashTokens(const void *k) {
  int32_t hash = 0;
  if (k != NULL) {
      const UColToken *key = (const UColToken *)k;
      int32_t len = (key->source & 0xFF000000)>>24;
      int32_t inc = ((len - 32) / 32) + 1;

      const UChar *p = (key->source & 0x00FFFFFF) + rulesToParse;
      const UChar *limit = p + len;    

      while (p<limit) {
          hash = (hash * 37) + *p;
          p += inc;
      }

      if((len = ((key->expansion & 0xFF000000)>>24)) != 0) {
        p = (key->expansion & 0x00FFFFFF) + rulesToParse;
        limit = p + len;    
        while (p<limit) {
            hash = (hash * 37) + *p;
            p += inc;
        }
      }
  }
  return hash;
}

U_CAPI UBool
uhash_compareTokens(const void *key1, const void *key2) {
    const UColToken *p1 = (const UColToken*) key1;
    const UColToken *p2 = (const UColToken*) key2;
    const UChar *s1 = (p1->source & 0x00FFFFFF) + rulesToParse;
    const UChar *s2 = (p2->source & 0x00FFFFFF) + rulesToParse;
    uint32_t s1L = ((p1->source & 0xFF000000) >> 24);
    uint32_t s2L = ((p2->source & 0xFF000000) >> 24);

    if (p1 == p2) {
        return TRUE;
    }
    if (p1 == NULL || p2 == NULL) {
        return FALSE;
    }
    if(p1->source == p2->source && p1->expansion == p2->expansion) {
      return TRUE;
    }
    if(s1L != s2L) {
      return FALSE;
    }
    while(s1 < s1+s1L-1 && *s1 == *s2) {
      ++s1;
      ++s2;
    }
    if(*s1 == *s2) {
      s1 = (p1->expansion & 0x00FFFFFF) + rulesToParse;
      s2 = (p2->expansion & 0x00FFFFFF) + rulesToParse;
      s1L = ((p1->expansion & 0xFF000000) >> 24);
      s2L = ((p2->expansion & 0xFF000000) >> 24);
      if(s1L != s2L) {
        return FALSE;
      }
      if(s1L != 0) {
        while(s1 < s1+s1L-1 && *s1 == *s2) {
          ++s1;
          ++s2;
        }
        return (UBool)(*s1 == *s2);
      } else {
        return TRUE;
      }
    } else {
      return FALSE;
    }
}

void deleteElement(void *element) {
/*
    UCAElements *el = (UCAElements *)element;

    int32_t i = 0;
    for(i = 0; i < el->noOfCEs; i++) {
        free(el->primary[i]);
        free(el->secondary[i]);
        free(el->tertiary[i]);
    }
    free(el);
*/
}

void ucol_tok_initTokenList(UColTokenParser *src, UErrorCode *status) {
  if(U_FAILURE(*status)) {
    return;
  }
  rulesToParse = src->source;
  uchars2tokens = uhash_open(uhash_hashTokens, uhash_compareTokens, status);
  uhash_setValueDeleter(uchars2tokens, deleteElement);
}

UColToken *ucol_tok_open() {
  return NULL;
}

#define UCOL_TOK_UNSET 0xFFFFFFFF
#define UCOL_TOK_RESET 0xDEADBEEF

/*
Processing Description
  1 Build a ListList. Each list has a header, which contains two lists (positive 
  and negative), a reset token, a baseCE, nextCE, and previousCE. The lists and 
  reset may be null. 
  2 As you process, you keep a LAST pointer that points to the last token you 
  handled. 
*/

uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status) {
  UColToken *lastToken = NULL;
  uint32_t newCharsLen = 0, newExtensionsLen = 0;
  uint32_t charsOffset = 0, extensionOffset = 0;
  uint32_t expandNext = 0;

  uint32_t newStrength = UCOL_TOK_UNSET; 

  ucol_tok_initTokenList(src, status);

  while(src->current < src->end) {
    { /* parsing part */

      UBool inChars = TRUE;
      UBool inQuote = FALSE;

      newStrength = UCOL_TOK_UNSET; 
      newCharsLen = 0; newExtensionsLen = 0;
      charsOffset = 0; extensionOffset = 0;

      while (src->current < src->end) {
          UChar ch = *(src->current);

        if (inQuote) {
          if (ch == 0x0027/*'\''*/) {
              inQuote = FALSE;
          } else {
            if ((newCharsLen == 0) || inChars) {
              if(newCharsLen == 0) {
                charsOffset = src->current - src->source;
              }
              newCharsLen++;
            } else {
              if(newExtensionsLen == 0) {
                extensionOffset = src->current - src->source;
              }
              newExtensionsLen++;
            }
          }
        } else {
          /* Sets the strength for this entry */
          switch (ch) {
            case 0x003D/*'='*/ : 
              if (newStrength != -1) {
                goto EndOfLoop;
              }

              newStrength = UCOL_IDENTICAL;
              break;

            case 0x002C/*','*/:  
              if (newStrength != UCOL_TOK_UNSET) {
                goto EndOfLoop;
              }

              newStrength = UCOL_TERTIARY;
              break;

            case  0x003B/*';'*/:
              if (newStrength != UCOL_TOK_UNSET) {
                goto EndOfLoop;
              }

              newStrength = UCOL_SECONDARY;
              break;

            case 0x003C/*'<'*/:  
              if (newStrength != UCOL_TOK_UNSET) {
                goto EndOfLoop;
              }

              newStrength = UCOL_PRIMARY;
              break;

            case 0x0026/*'&'*/:  
              if (newStrength != UCOL_TOK_UNSET) {
                goto EndOfLoop;
              }

              newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */
              break;

            /* Ignore the white spaces */
            case 0x0009/*'\t'*/:
            case 0x000C/*'\f'*/:
            case 0x000D/*'\r'*/:
            case 0x000A/*'\n'*/:
            case 0x0020/*' '*/:  
              break; /* skip whitespace TODO use Unicode */

            case 0x002F/*'/'*/:
                    /* This entry has an extension. */
              inChars = FALSE;
              break;

            case 0x0027/*'\''*/:
              inQuote = TRUE;
              ch = *(++(src->current)); /*pattern[++index]; */

              if (newCharsLen == 0) {
                charsOffset = src->current - src->source;
                newCharsLen++;
              } else if (inChars) {
                if(newCharsLen == 0) {
                  charsOffset = src->current - src->source;
                }
                newCharsLen++;
              } else {
                newExtensionsLen++;
              }

              break;

            default:
              if (newStrength == UCOL_TOK_UNSET) {
                *status = U_INVALID_FORMAT_ERROR;
                return 0;
              }

              if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {
                *status = U_INVALID_FORMAT_ERROR;
                return 0;
              }


              if (inChars) {
                if(newCharsLen == 0) {
                  charsOffset = src->current - src->source;
                }
                newCharsLen++;
              } else {
                if(newExtensionsLen == 0) {
                  extensionOffset = src->current - src->source;
                }
                newExtensionsLen++;
              }

              break;
            }
        }

          src->current++;
        }

     EndOfLoop:
      if (newStrength == -1) {
        return 0;
      }

      if (newCharsLen == 0) {
        *status = U_INVALID_FORMAT_ERROR;
        return 0;
      }
    }

    {
      UColToken *sourceToken = NULL;
      UColToken key;

      key.source = newCharsLen << 24 | charsOffset;
      key.expansion = newExtensionsLen << 24 | extensionOffset;

      /*  4 Lookup each [source,  expansion] in the CharsToToken map, and find a sourceToken */
      sourceToken = (UColToken *)uhash_get(uchars2tokens, &key);

      if(newStrength != UCOL_TOK_RESET) {
        if(lastToken == NULL) { /* this means that rules haven't started properly */
          *status = U_INVALID_FORMAT_ERROR;
          return 0;
        }
      /*  6 Otherwise (when relation != reset) */
        if(sourceToken == NULL) {
          /* If sourceToken is null, create new one, */
          sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));
          sourceToken->source = newCharsLen << 24 | charsOffset;
          sourceToken->expansion = newExtensionsLen << 24 | extensionOffset;

          sourceToken->debugSource = *(src->source + charsOffset);
          if(newExtensionsLen > 0) {
            sourceToken->debugExpansion = *(src->source + extensionOffset);
          } else {
            sourceToken->debugExpansion = 0;
          }


          sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */
          sourceToken->next = NULL;
          sourceToken->previous = NULL;
          uhash_put(uchars2tokens, sourceToken, sourceToken, status);
        } else {
          /* we could have fished out a reset here */
          if(sourceToken->strength != UCOL_TOK_RESET) {
            /* otherwise remove sourceToken from where it was. */
            if(sourceToken->next != NULL) {
              sourceToken->next->previous = sourceToken->previous;
            } else {
              sourceToken->listHeader->last[sourceToken->polarity] = sourceToken->previous;
            }

            if(sourceToken->previous != NULL) {
              sourceToken->previous->next = sourceToken->next;
            } else {
              sourceToken->listHeader->first[sourceToken->polarity] = sourceToken->next;
            }
          }
        }

        sourceToken->strength = newStrength;
        sourceToken->listHeader = lastToken->listHeader;
        /*
        1.	Find the strongest strength in each list, and set strongestP and strongestN 
        accordingly in the headers. 
        */
        if(sourceToken->listHeader->strongest[sourceToken->polarity] > sourceToken->strength) {
          sourceToken->listHeader->strongest[sourceToken->polarity] = sourceToken->strength;
        }

        if(lastToken->strength == UCOL_TOK_RESET) {
        /* If LAST is a reset 
              insert sourceToken at the head of either the positive list or the negative 
              list, depending on the polarity of relation. 
              set the polarity of sourceToken to be the same as the list you put it in. */
          if(sourceToken->listHeader->first[sourceToken->polarity] == 0) {
            sourceToken->listHeader->first[sourceToken->polarity] = sourceToken;
            sourceToken->listHeader->last[sourceToken->polarity] = sourceToken;
          } else {
            sourceToken->listHeader->first[sourceToken->polarity]->previous = sourceToken;
            sourceToken->next = sourceToken->listHeader->first[sourceToken->polarity];
            sourceToken->listHeader->first[sourceToken->polarity] = sourceToken;
          }

          /*
            If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c * 
            d * ... into &x * c/y * d * ... 
          */
          if(expandNext != 0 && sourceToken->expansion == 0) {
            sourceToken->expansion = expandNext;
            sourceToken->debugExpansion = *(src->source + (expandNext & 0xFFFFFF));
            expandNext = 0;
          }

        } else {
        /* Otherwise (when LAST is not a reset) 
              if polarity (LAST) == polarity(relation), insert sourceToken after LAST, 
              otherwise insert before. 
              when inserting after or before, search to the next position with the same 
              strength in that direction. (This is called postpone insertion).         */
          if(lastToken->polarity == sourceToken->polarity) {
            while(lastToken->next != NULL && lastToken->next->strength > sourceToken->strength) {
              lastToken = lastToken->next;
            }
            sourceToken->previous = lastToken;
            if(lastToken->next != NULL) {
              lastToken->next->previous = sourceToken;
            } else {
              sourceToken->listHeader->last[sourceToken->polarity] = sourceToken;
            }

            sourceToken->next = lastToken->next;
            lastToken->next = sourceToken;
          } else {
            while(lastToken->previous != NULL && lastToken->previous->strength > sourceToken->strength) {
              lastToken = lastToken->previous;
            }
            sourceToken->next = lastToken;
            if(lastToken->previous != NULL) {
              lastToken->previous->next = sourceToken;
            } else {
              sourceToken->listHeader->first[sourceToken->polarity] = sourceToken;
            }
            sourceToken->previous = lastToken->previous;
            lastToken->previous = sourceToken;
          }
        }
      } else {
        uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;
        collIterate s;

        if(newCharsLen > 1) {
          expandNext = ((newCharsLen-1)<<24) | (charsOffset + 1);
        } else {
          expandNext = 0;
        }

      /*  5 If the relation is a reset: 
          If sourceToken is null 
            Create new list, create new sourceToken, make the baseCE from source, put 
            the sourceToken in ListHeader of the new list */
        if(sourceToken == NULL) {

          /*
              3. The rule for "& abcdefg < xyz" is a bit tricky. What it turns into is:

              a. Find the longest sequence in "abcdefg" that is in UCA *OR* in the
              tailoring so far. Suppose that is "abcd".
              b. Then treat this rule as equivalent to:
              "& abcd < xyz / efg"
          */
          if(newCharsLen > 1) {
            key.source = 0x01000000 | charsOffset;
            sourceToken = (UColToken *)uhash_get(uchars2tokens, &key);
            if(sourceToken != NULL) {
              lastToken = sourceToken;
              continue;
            }
          }
          /* do the reset thing */
          sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));
          sourceToken->source = newCharsLen << 24 | charsOffset;
          sourceToken->expansion = newExtensionsLen << 24 | extensionOffset;
          
          sourceToken->debugSource = *(src->source + charsOffset);
          sourceToken->debugExpansion = *(src->source + extensionOffset);


          sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */
          sourceToken->strength = UCOL_TOK_RESET;
          sourceToken->next = NULL;
          sourceToken->previous = NULL;
          sourceToken->listHeader = &ListList[listPosition];
          /*
            3 Consider each item: relation, source, and expansion: e.g. ...< x / y ... 
              First convert all expansions into normal form. Examples: 
                If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c * 
                d * ... into &x * c/y * d * ... 
                Note: reset values can never have expansions, although they can cause the 
                very next item to have one. They may be contractions, if they are found 
                earlier in the list. 
          */
          if(newCharsLen > 1) {
            sourceToken->source = 0x01000000 | charsOffset;
          } 

 
          init_collIterate(src->source+charsOffset, newCharsLen, &s, FALSE);

          CE = ucol_getNextCE(src->UCA, &s, status);
          /*UCOL_GETNEXTCE(CE, src->UCA, s, &status);*/

          SecondCE = ucol_getNextCE(src->UCA, &s, status);
          /*UCOL_GETNEXTCE(SecondCE, src->UCA, s, &status);*/
    
          ListList[listPosition].baseCE = CE;
          ListList[listPosition].first[UCOL_TOK_POLARITY_NEGATIVE] = NULL;
          ListList[listPosition].last[UCOL_TOK_POLARITY_NEGATIVE] = NULL;
          ListList[listPosition].first[UCOL_TOK_POLARITY_POSITIVE] = NULL;
          ListList[listPosition].last[UCOL_TOK_POLARITY_POSITIVE] = NULL;
          ListList[listPosition].strongest[UCOL_TOK_POLARITY_NEGATIVE] = UCOL_TOK_UNSET;
          ListList[listPosition].strongest[UCOL_TOK_POLARITY_POSITIVE] = UCOL_TOK_UNSET;

          ListList[listPosition].reset = sourceToken;

          listPosition++;
          uhash_put(uchars2tokens, sourceToken, sourceToken, status);
        } else { /* reset to something already in rules */
        }
      }
      /*  7 After all this, set LAST to point to sourceToken, and goto step 3. */  
      lastToken = sourceToken;
    }  
  }

  src->lh = ListList;
  src->resultLen = listPosition;

  return listPosition;
}
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`#include "unicode/ustring.h"`

			`#include "cmemory.h"`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00			`#include "ucoltok.h"`
			`#include "uhash.h"`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`#include "ucmp32.h"`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00
			`static UHashtable *uchars2tokens;`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`static UColTokListHeader ListList[256];`
			`static uint32_t listPosition = 0;`

			`static const UChar *rulesToParse = 0;`

			`/* will use a small structure, tokHash */`

			`U_CAPI int32_t`
			`uhash_hashTokens(const void *k) {`
			`int32_t hash = 0;`
			`if (k != NULL) {`
			`const UColToken key = (const UColToken )k;`
			`int32_t len = (key->source & 0xFF000000)>>24;`
			`int32_t inc = ((len - 32) / 32) + 1;`

			`const UChar *p = (key->source & 0x00FFFFFF) + rulesToParse;`
			`const UChar *limit = p + len;`

			`while (p<limit) {`
			`hash = (hash * 37) + *p;`
			`p += inc;`
			`}`

			`if((len = ((key->expansion & 0xFF000000)>>24)) != 0) {`
			`p = (key->expansion & 0x00FFFFFF) + rulesToParse;`
			`limit = p + len;`
			`while (p<limit) {`
			`hash = (hash * 37) + *p;`
			`p += inc;`
			`}`
			`}`
			`}`
			`return hash;`
			`}`

			`U_CAPI UBool`
			`uhash_compareTokens(const void key1, const void key2) {`
			`const UColToken p1 = (const UColToken) key1;`
			`const UColToken p2 = (const UColToken) key2;`
			`const UChar *s1 = (p1->source & 0x00FFFFFF) + rulesToParse;`
			`const UChar *s2 = (p2->source & 0x00FFFFFF) + rulesToParse;`
			`uint32_t s1L = ((p1->source & 0xFF000000) >> 24);`
			`uint32_t s2L = ((p2->source & 0xFF000000) >> 24);`

			`if (p1 == p2) {`
			`return TRUE;`
			`}`
			`if (p1 == NULL \|\| p2 == NULL) {`
			`return FALSE;`
			`}`
			`if(p1->source == p2->source && p1->expansion == p2->expansion) {`
			`return TRUE;`
			`}`
			`if(s1L != s2L) {`
			`return FALSE;`
			`}`
			`while(s1 < s1+s1L-1 && s1 == s2) {`
			`++s1;`
			`++s2;`
			`}`
			`if(s1 == s2) {`
			`s1 = (p1->expansion & 0x00FFFFFF) + rulesToParse;`
			`s2 = (p2->expansion & 0x00FFFFFF) + rulesToParse;`
			`s1L = ((p1->expansion & 0xFF000000) >> 24);`
			`s2L = ((p2->expansion & 0xFF000000) >> 24);`
			`if(s1L != s2L) {`
			`return FALSE;`
			`}`
			`if(s1L != 0) {`
			`while(s1 < s1+s1L-1 && s1 == s2) {`
			`++s1;`
			`++s2;`
			`}`
			`return (UBool)(s1 == s2);`
			`} else {`
			`return TRUE;`
			`}`
			`} else {`
			`return FALSE;`
			`}`
			`}`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00
			`void deleteElement(void *element) {`
			`/*`
			`UCAElements el = (UCAElements )element;`

			`int32_t i = 0;`
			`for(i = 0; i < el->noOfCEs; i++) {`
			`free(el->primary[i]);`
			`free(el->secondary[i]);`
			`free(el->tertiary[i]);`
			`}`
			`free(el);`
			`*/`
			`}`

ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`void ucol_tok_initTokenList(UColTokenParser src, UErrorCode status) {`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00			`if(U_FAILURE(*status)) {`
			`return;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`rulesToParse = src->source;`
			`uchars2tokens = uhash_open(uhash_hashTokens, uhash_compareTokens, status);`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00			`uhash_setValueDeleter(uchars2tokens, deleteElement);`
			`}`

			`UColToken *ucol_tok_open() {`
			`return NULL;`
			`}`

ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`#define UCOL_TOK_UNSET 0xFFFFFFFF`
			`#define UCOL_TOK_RESET 0xDEADBEEF`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00
			`/*`
			`Processing Description`
			`1 Build a ListList. Each list has a header, which contains two lists (positive`
			`and negative), a reset token, a baseCE, nextCE, and previousCE. The lists and`
			`reset may be null.`
			`2 As you process, you keep a LAST pointer that points to the last token you`
			`handled.`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`*/`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`uint32_t ucol_tok_assembleTokenList(UColTokenParser src, UErrorCode status) {`
			`UColToken *lastToken = NULL;`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`uint32_t newCharsLen = 0, newExtensionsLen = 0;`
			`uint32_t charsOffset = 0, extensionOffset = 0;`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`uint32_t expandNext = 0;`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00
			`uint32_t newStrength = UCOL_TOK_UNSET;`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00
			`ucol_tok_initTokenList(src, status);`

			`while(src->current < src->end) {`
			`{ /* parsing part */`

			`UBool inChars = TRUE;`
			`UBool inQuote = FALSE;`

ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00			`newStrength = UCOL_TOK_UNSET;`
			`newCharsLen = 0; newExtensionsLen = 0;`
			`charsOffset = 0; extensionOffset = 0;`

ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`while (src->current < src->end) {`
			`UChar ch = *(src->current);`

			`if (inQuote) {`
			`if (ch == 0x0027/'\''/) {`
			`inQuote = FALSE;`
			`} else {`
			`if ((newCharsLen == 0) \|\| inChars) {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`if(newCharsLen == 0) {`
			`charsOffset = src->current - src->source;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`newCharsLen++;`
			`} else {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`if(newExtensionsLen == 0) {`
			`extensionOffset = src->current - src->source;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`newExtensionsLen++;`
			`}`
			`}`
			`} else {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`/* Sets the strength for this entry */`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`switch (ch) {`
			`case 0x003D/'='/ :`
			`if (newStrength != -1) {`
			`goto EndOfLoop;`
			`}`

			`newStrength = UCOL_IDENTICAL;`
			`break;`

			`case 0x002C/','/:`
			`if (newStrength != UCOL_TOK_UNSET) {`
			`goto EndOfLoop;`
			`}`

			`newStrength = UCOL_TERTIARY;`
			`break;`

			`case 0x003B/';'/:`
			`if (newStrength != UCOL_TOK_UNSET) {`
			`goto EndOfLoop;`
			`}`

			`newStrength = UCOL_SECONDARY;`
			`break;`

			`case 0x003C/'<'/:`
			`if (newStrength != UCOL_TOK_UNSET) {`
			`goto EndOfLoop;`
			`}`

			`newStrength = UCOL_PRIMARY;`
			`break;`

			`case 0x0026/'&'/:`
			`if (newStrength != UCOL_TOK_UNSET) {`
			`goto EndOfLoop;`
			`}`

			`newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */`
			`break;`

ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`/* Ignore the white spaces */`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`case 0x0009/'\t'/:`
			`case 0x000C/'\f'/:`
			`case 0x000D/'\r'/:`
			`case 0x000A/'\n'/:`
			`case 0x0020/' '/:`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`break; /* skip whitespace TODO use Unicode */`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00
			`case 0x002F/'/'/:`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`/* This entry has an extension. */`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`inChars = FALSE;`
			`break;`

			`case 0x0027/'\''/:`
			`inQuote = TRUE;`
			`ch = (++(src->current)); /pattern[++index]; */`

			`if (newCharsLen == 0) {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`charsOffset = src->current - src->source;`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`newCharsLen++;`
			`} else if (inChars) {`
ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00			`if(newCharsLen == 0) {`
			`charsOffset = src->current - src->source;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`newCharsLen++;`
			`} else {`
			`newExtensionsLen++;`
			`}`

			`break;`

			`default:`
			`if (newStrength == UCOL_TOK_UNSET) {`
			`*status = U_INVALID_FORMAT_ERROR;`
			`return 0;`
			`}`

			`if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {`
			`*status = U_INVALID_FORMAT_ERROR;`
			`return 0;`
			`}`



			`if (inChars) {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`if(newCharsLen == 0) {`
			`charsOffset = src->current - src->source;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`newCharsLen++;`
			`} else {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`if(newExtensionsLen == 0) {`
			`extensionOffset = src->current - src->source;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`newExtensionsLen++;`
			`}`

			`break;`
			`}`
			`}`

			`src->current++;`
			`}`

			`EndOfLoop:`
			`if (newStrength == -1) {`
			`return 0;`
			`}`

			`if (newCharsLen == 0) {`
			`*status = U_INVALID_FORMAT_ERROR;`
			`return 0;`
			`}`
			`}`

			`{`
			`UColToken *sourceToken = NULL;`
			`UColToken key;`

			`key.source = newCharsLen << 24 \| charsOffset;`
			`key.expansion = newExtensionsLen << 24 \| extensionOffset;`

			`/* 4 Lookup each [source, expansion] in the CharsToToken map, and find a sourceToken */`
			`sourceToken = (UColToken *)uhash_get(uchars2tokens, &key);`
ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`if(newStrength != UCOL_TOK_RESET) {`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`if(lastToken == NULL) { /* this means that rules haven't started properly */`
			`*status = U_INVALID_FORMAT_ERROR;`
			`return 0;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`/* 6 Otherwise (when relation != reset) */`
			`if(sourceToken == NULL) {`
			`/* If sourceToken is null, create new one, */`
			`sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));`
			`sourceToken->source = newCharsLen << 24 \| charsOffset;`
			`sourceToken->expansion = newExtensionsLen << 24 \| extensionOffset;`
ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00
			`sourceToken->debugSource = *(src->source + charsOffset);`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`if(newExtensionsLen > 0) {`
			`sourceToken->debugExpansion = *(src->source + extensionOffset);`
			`} else {`
			`sourceToken->debugExpansion = 0;`
			`}`

ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`sourceToken->next = NULL;`
			`sourceToken->previous = NULL;`
			`uhash_put(uchars2tokens, sourceToken, sourceToken, status);`
			`} else {`
			`/* we could have fished out a reset here */`
			`if(sourceToken->strength != UCOL_TOK_RESET) {`
			`/* otherwise remove sourceToken from where it was. */`
			`if(sourceToken->next != NULL) {`
			`sourceToken->next->previous = sourceToken->previous;`
			`} else {`
			`sourceToken->listHeader->last[sourceToken->polarity] = sourceToken->previous;`
			`}`

			`if(sourceToken->previous != NULL) {`
			`sourceToken->previous->next = sourceToken->next;`
			`} else {`
			`sourceToken->listHeader->first[sourceToken->polarity] = sourceToken->next;`
			`}`
			`}`
			`}`

			`sourceToken->strength = newStrength;`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`sourceToken->listHeader = lastToken->listHeader;`
			`/*`
			`1. Find the strongest strength in each list, and set strongestP and strongestN`
			`accordingly in the headers.`
			`*/`
			`if(sourceToken->listHeader->strongest[sourceToken->polarity] > sourceToken->strength) {`
			`sourceToken->listHeader->strongest[sourceToken->polarity] = sourceToken->strength;`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00
			`if(lastToken->strength == UCOL_TOK_RESET) {`
			`/* If LAST is a reset`
			`insert sourceToken at the head of either the positive list or the negative`
			`list, depending on the polarity of relation.`
			`set the polarity of sourceToken to be the same as the list you put it in. */`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`if(sourceToken->listHeader->first[sourceToken->polarity] == 0) {`
			`sourceToken->listHeader->first[sourceToken->polarity] = sourceToken;`
			`sourceToken->listHeader->last[sourceToken->polarity] = sourceToken;`
			`} else {`
			`sourceToken->listHeader->first[sourceToken->polarity]->previous = sourceToken;`
			`sourceToken->next = sourceToken->listHeader->first[sourceToken->polarity];`
			`sourceToken->listHeader->first[sourceToken->polarity] = sourceToken;`
			`}`

ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`/*`
			`If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *`
			`d * ... into &x * c/y * d * ...`
			`*/`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`if(expandNext != 0 && sourceToken->expansion == 0) {`
			`sourceToken->expansion = expandNext;`
			`sourceToken->debugExpansion = *(src->source + (expandNext & 0xFFFFFF));`
			`expandNext = 0;`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`}`

ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`} else {`
			`/* Otherwise (when LAST is not a reset)`
			`if polarity (LAST) == polarity(relation), insert sourceToken after LAST,`
			`otherwise insert before.`
			`when inserting after or before, search to the next position with the same`
			`strength in that direction. (This is called postpone insertion). */`
			`if(lastToken->polarity == sourceToken->polarity) {`
			`while(lastToken->next != NULL && lastToken->next->strength > sourceToken->strength) {`
			`lastToken = lastToken->next;`
			`}`
			`sourceToken->previous = lastToken;`
			`if(lastToken->next != NULL) {`
			`lastToken->next->previous = sourceToken;`
			`} else {`
			`sourceToken->listHeader->last[sourceToken->polarity] = sourceToken;`
			`}`

			`sourceToken->next = lastToken->next;`
			`lastToken->next = sourceToken;`
			`} else {`
			`while(lastToken->previous != NULL && lastToken->previous->strength > sourceToken->strength) {`
			`lastToken = lastToken->previous;`
			`}`
			`sourceToken->next = lastToken;`
			`if(lastToken->previous != NULL) {`
			`lastToken->previous->next = sourceToken;`
			`} else {`
			`sourceToken->listHeader->first[sourceToken->polarity] = sourceToken;`
			`}`
			`sourceToken->previous = lastToken->previous;`
			`lastToken->previous = sourceToken;`
			`}`
			`}`
			`} else {`
ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00			`uint32_t CE = UCOL_NOT_FOUND, SecondCE = UCOL_NOT_FOUND;`
			`collIterate s;`

ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`if(newCharsLen > 1) {`
			`expandNext = ((newCharsLen-1)<<24) \| (charsOffset + 1);`
			`} else {`
			`expandNext = 0;`
			`}`

ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`/* 5 If the relation is a reset:`
			`If sourceToken is null`
			`Create new list, create new sourceToken, make the baseCE from source, put`
			`the sourceToken in ListHeader of the new list */`
			`if(sourceToken == NULL) {`
ICU-96 more collation X-SVN-Rev: 3559 2001-02-05 05:36:12 +00:00
			`/*`
			`3. The rule for "& abcdefg < xyz" is a bit tricky. What it turns into is:`

			`a. Find the longest sequence in "abcdefg" that is in UCA OR in the`
			`tailoring so far. Suppose that is "abcd".`
			`b. Then treat this rule as equivalent to:`
			`"& abcd < xyz / efg"`
			`*/`
ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00			`if(newCharsLen > 1) {`
			`key.source = 0x01000000 \| charsOffset;`
			`sourceToken = (UColToken *)uhash_get(uchars2tokens, &key);`
			`if(sourceToken != NULL) {`
			`lastToken = sourceToken;`
			`continue;`
			`}`
			`}`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`/* do the reset thing */`
			`sourceToken = (UColToken *)uprv_malloc(sizeof(UColToken));`
			`sourceToken->source = newCharsLen << 24 \| charsOffset;`
			`sourceToken->expansion = newExtensionsLen << 24 \| extensionOffset;`
ICU-96 some debug stuff, avoid dead loop X-SVN-Rev: 3510 2001-01-31 21:10:55 +00:00
			`sourceToken->debugSource = *(src->source + charsOffset);`
			`sourceToken->debugExpansion = *(src->source + extensionOffset);`


ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`sourceToken->polarity = UCOL_TOK_POLARITY_POSITIVE; /* TODO: this should also handle reverse */`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00			`sourceToken->strength = UCOL_TOK_RESET;`
			`sourceToken->next = NULL;`
			`sourceToken->previous = NULL;`
			`sourceToken->listHeader = &ListList[listPosition];`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00			`/*`
			`3 Consider each item: relation, source, and expansion: e.g. ...< x / y ...`
			`First convert all expansions into normal form. Examples:`
			`If "xy" doesn't occur earlier in the list or in the UCA, convert &xy * c *`
			`d * ... into &x * c/y * d * ...`
			`Note: reset values can never have expansions, although they can cause the`
			`very next item to have one. They may be contractions, if they are found`
			`earlier in the list.`
			`*/`
			`if(newCharsLen > 1) {`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`sourceToken->source = 0x01000000 \| charsOffset;`
			`}`
ICU-96 intermediate commit for George to build X-SVN-Rev: 3507 2001-01-31 20:17:12 +00:00
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00
			`init_collIterate(src->source+charsOffset, newCharsLen, &s, FALSE);`

			`CE = ucol_getNextCE(src->UCA, &s, status);`
			`/UCOL_GETNEXTCE(CE, src->UCA, s, &status);/`

			`SecondCE = ucol_getNextCE(src->UCA, &s, status);`
			`/UCOL_GETNEXTCE(SecondCE, src->UCA, s, &status);/`

			`ListList[listPosition].baseCE = CE;`
ICU-96 token list generation should work now, moving to CE generation X-SVN-Rev: 3514 2001-01-31 23:12:37 +00:00			`ListList[listPosition].first[UCOL_TOK_POLARITY_NEGATIVE] = NULL;`
			`ListList[listPosition].last[UCOL_TOK_POLARITY_NEGATIVE] = NULL;`
			`ListList[listPosition].first[UCOL_TOK_POLARITY_POSITIVE] = NULL;`
			`ListList[listPosition].last[UCOL_TOK_POLARITY_POSITIVE] = NULL;`
			`ListList[listPosition].strongest[UCOL_TOK_POLARITY_NEGATIVE] = UCOL_TOK_UNSET;`
			`ListList[listPosition].strongest[UCOL_TOK_POLARITY_POSITIVE] = UCOL_TOK_UNSET;`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00
			`ListList[listPosition].reset = sourceToken;`

			`listPosition++;`
			`uhash_put(uchars2tokens, sourceToken, sourceToken, status);`
			`} else { /* reset to something already in rules */`
			`}`
			`}`
			`/* 7 After all this, set LAST to point to sourceToken, and goto step 3. */`
			`lastToken = sourceToken;`
			`}`
			`}`

			`src->lh = ListList;`
ICU-96 more collation X-SVN-Rev: 3559 2001-02-05 05:36:12 +00:00			`src->resultLen = listPosition;`
ICU-96 token list, CE searching are in X-SVN-Rev: 3498 2001-01-31 07:20:56 +00:00
			`return listPosition;`
ICU-96 more new files. Will try to do unix port now... X-SVN-Rev: 3487 2001-01-29 22:09:24 +00:00			`}`
No results found.