/* ******************************************************************************* * * Copyright (C) 2003, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * * File line.h * * Modification History: * * Date Name Description * 07/07/2003 weiv Creation. ******************************************************************************* */ // // class Line // // Each line from the source file (containing a name, presumably) gets // one of these structs. // #include "strengthprobe.h" StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE, UChar B0, UChar B1, UChar B2, UChar B3) : SE(SE), B0(B0), B1(B1), B2(B2), B3(B3), utilFirstP(&utilFirst), utilSecondP(&utilSecond), frenchSecondary(FALSE), comparer(comparer), skgetter(getter) { } int StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3) { this->B0 = B0; this->B1 = B1; this->B2 = B2; this-> B3 = B3; return checkSanity(); } int StrengthProbe::checkSanity() { int sanityRes; utilFirst.setTo(B0); utilSecond.setTo(B3); if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { return sanityRes*10 + 3; } utilSecond.setTo(B2); if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { return sanityRes*10 + 2; } utilSecond.setTo(B1); if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { return sanityRes*10 + 1; } utilFirst.setTo(B3); utilSecond.setTo(B2); if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { return sanityRes*10 + 5; } utilSecond.setTo(B1); if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { return sanityRes*10 + 4; } utilFirst.setTo(B2); if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) { return sanityRes*10 + 6; } utilFirst.setTo(B0); if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { return 1000; } utilFirst.setTo(B1); if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { return 1001; } utilFirst.setTo(B2); if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { return 1002; } utilFirst.setTo(B3); if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) { return 1003; } return 0; } UBool StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) { utilFirst.name[0] = first; utilFirst.name[1] = SE; u_strcpy(utilFirst.name+2, x.name); utilFirst.name[x.len+2] = 0; utilFirst.len = x.len+2; utilSecond.name[0] = second; utilSecond.name[1] = SE; u_strcpy(utilSecond.name+2, y.name); utilSecond.name[y.len+2] = 0; utilSecond.len = y.len+2; if(comparer(&utilFirstP, &utilSecondP) < 0) { return TRUE; } else { return FALSE; } } UBool StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) { u_strcpy(utilFirst.name, x.name); utilFirst.name[x.len] = SE; utilFirst.name[x.len+1] = first; utilFirst.name[x.len+2] = 0; utilFirst.len = x.len + 2; u_strcpy(utilSecond.name, y.name); utilSecond.name[y.len] = SE; utilSecond.name[y.len+1] = second; utilSecond.name[y.len+2] = 0; utilSecond.len = y.len + 2; if(comparer(&utilFirstP, &utilSecondP) < 0) { return TRUE; } else { return FALSE; } } UBool StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) { utilFirst.name[0] = first; u_strcpy(utilFirst.name+1, x.name); utilFirst.name[x.len+1] = 0; utilFirst.len = x.len + 1; utilSecond.name[0] = second; u_strcpy(utilSecond.name+1, y.name); utilSecond.name[y.len+1] = 0; utilSecond.len = y.len + 1; if(comparer(&utilFirstP, &utilSecondP) < 0) { return TRUE; } else { return FALSE; } } UBool StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) { u_strcpy(utilFirst.name, x.name); utilFirst.name[x.len] = first; utilFirst.name[x.len+1] = 0; utilFirst.len = x.len + 1; u_strcpy(utilSecond.name, y.name); utilSecond.name[y.len] = second; utilSecond.name[y.len+1] = 0; utilSecond.len = y.len + 1; if(comparer(&utilFirstP, &utilSecondP) < 0) { return TRUE; } else { return FALSE; } } UColAttributeValue StrengthProbe::getStrength(const Line &x, const Line &y) { const Line *xp = &x; const Line *yp = &y; Line empty; Line *emptyP = ∅ if(comparer(&emptyP, &xp) == 0) { return distanceFromEmptyString(y); } int32_t result = comparer(&xp, &yp); if(result == 0) { return UCOL_IDENTICAL; } else if(result > 0) { return UCOL_OFF; // bad situation } else { // we need to probe strength if(probeSuffix(x, y, B1, B0)) { //if(probePrefix(x, y, B2, B0)) { // swamps secondary difference return UCOL_PRIMARY; } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference return UCOL_SECONDARY; } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference return UCOL_TERTIARY; } else if(!probePrefix(x, y, B3, B0)) { return UCOL_QUATERNARY; } /* //if(probeSuffix(x, y, B1, B0)) { if(probePrefix(x, y, B2, B0)) { // swamps secondary difference return UCOL_PRIMARY; } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference return UCOL_SECONDARY; } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference return UCOL_TERTIARY; } else if(!probePrefix(x, y, B3, B0)) { return UCOL_QUATERNARY; } */ } return UCOL_OFF; // bad } UColAttributeValue StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) { Line x(sx); Line y(sy); return getStrength(x, y); } int32_t StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) { Line x(sx); Line y(sy); const Line *xp = &x; const Line *yp = &y; return comparer(&xp, &yp); } int32_t StrengthProbe::compare(const Line &x, const Line &y) { const Line *xp = &x; const Line *yp = &y; return comparer(&xp, &yp); } UColAttributeValue StrengthProbe::distanceFromEmptyString(const Line &x) { if(x.name[0] == 0x30D) { int32_t putBreakPointHere = 0; } Line empty; Line *emptyP = ∅ uint8_t buff[256]; getSortKey(empty.name, empty.len, buff, 256); Line B0Line(B0); Line *B0LineP = &B0Line; const Line *xp = &x; int32_t result = comparer(&emptyP, &xp); if(result == 0) { return UCOL_IDENTICAL; } else if(result > 0) { return UCOL_OFF; } result = comparer(&B0LineP, &xp); if(result <= 0) { return UCOL_PRIMARY; } Line sexb0(SE); sexb0.append(x.name, x.len); sexb0.append(B0); Line seb0(SE); seb0.append(B0); uint8_t seb0K[256]; uint8_t sexb0K[256]; uint8_t seb2K[256]; uint8_t seb3K[256]; memset(seb0K, 0, 256); memset(sexb0K, 0, 256); memset(seb2K, 0, 256); memset(seb3K, 0, 256); getSortKey(seb0, seb0K, 256); getSortKey(sexb0, sexb0K, 256); if(compare(seb0, sexb0) <= 0) { Line seb2(SE); seb2.append(B2); getSortKey(seb2, seb2K, 256); result = compare(seb2, sexb0); if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference return UCOL_SECONDARY; } Line seb3(SE); seb3.append(B3); getSortKey(seb3, seb3K, 256); if(compare(seb3, sexb0) < 0) { return UCOL_TERTIARY; } return UCOL_QUATERNARY; } else { // if this was UCA, we would have a primary difference. // however, this might not be so, since not everybody // makes well formed CEs. // in cs_CZ on linux, space is tertiary ignorable, but // its quaternary level strength is lower than quad // strengths for non-ignorables. oh well, more testing // required // I think that we can only have quaternary difference // here (in addition to primary difference). //if(!probePrefix(x, empty, B3, B0)) { //return UCOL_QUATERNARY; //} else { return UCOL_PRIMARY; //} } } UColAttributeValue StrengthProbe::distanceFromEmptyString(const UnicodeString &x) { const Line xp(x); return distanceFromEmptyString(xp); } UColAttributeValue StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) { contractionUtilFirst.setToConcat(&prefix, &x); contractionUtilSecond.setToConcat(&prefix, &y); return getStrength(contractionUtilFirst, contractionUtilSecond); } StrengthProbe::StrengthProbe(const StrengthProbe &that) { *this = that; } StrengthProbe & StrengthProbe::operator=(const StrengthProbe &that) { if(this != &that) { B0 = that.B0; B1 = that.B1; B2 = that.B2; B3 = that.B3; SE = that.SE; frenchSecondary = that.frenchSecondary; comparer = that.comparer; skgetter = that.skgetter; utilFirstP = &utilFirst; utilSecondP = &utilSecond; } return *this; } UBool StrengthProbe::isFrenchSecondary(UErrorCode &status) { utilFirst.setTo(B0); utilFirst.append(SE); utilFirst.append(B2); utilSecond.setTo(B2); utilSecond.append(SE); utilSecond.append(B0); int32_t result = compare(utilFirst, utilSecond); if(result < 0) { return FALSE; } else if(result > 0) { frenchSecondary = TRUE; return TRUE; } else { status = U_INTERNAL_PROGRAM_ERROR; return FALSE; } } UBool StrengthProbe::isUpperFirst(UErrorCode &status) { UChar i = 0; int32_t result = 0; int32_t upper = 0, lower = 0, equal = 0; for(i = 0x41; i < 0x5B; i++) { utilFirst.setTo(i); utilSecond.setTo(i+0x20); result = compare(utilFirst, utilSecond); if(result < 0) { upper++; } else if(result > 0) { lower++; } else { equal++; } } if(lower == 0 && equal == 0) { return TRUE; } if(upper == 0 && equal == 0) { return FALSE; } status = U_INTERNAL_PROGRAM_ERROR; return FALSE; }