ICU-20976 GCC 8 fixes phase 1

Some initial fixes for GCC 8

- setup a GCC 8 buildbot with -Wextra
- rewrite ucol_sit to use CharString
- workaround for gcc7+ on mac
  see https://github.com/arbor-sim/arbor/issues/562#issuecomment-409970434
- fix ucnv_2022 strcpy site
This commit is contained in:
Steven R. Loomis 2020-03-20 14:41:01 -07:00 committed by Steven R. Loomis
parent 1e24bcd721
commit 2ced2624fc
4 changed files with 90 additions and 67 deletions

View File

@ -39,6 +39,24 @@ matrix:
after_failure:
- cat `find out/junit-results -name "*.txt" -exec grep -l FAILED {} \;`
- name: "c: linux gcc 8 Wextra"
dist: bionic
language: cpp
addons:
apt:
packages:
- g++-8
env: PREFIX=/tmp/icu-prefix CC=gcc-8 CXX=g++-8 CXXFLAGS="-Wextra"
before_script:
- mkdir build
- cd build
- ../icu4c/source/runConfigureICU Linux --disable-debug --enable-release --disable-layoutex --prefix="${PREFIX}" || (tail -100 config.log ; false)
- make -j2
script:
- make -j2 check
- make install
- PATH="${PREFIX}/bin:$PATH" make -C test/hdrtst check
# gcc debug build.
# Includes dependency checker.
# Note - the dependency checker needs to be run on both a debug and an optimized build.

View File

@ -478,7 +478,7 @@ setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConv
static void U_CALLCONV
_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
char myLocale[6]={' ',' ',' ',' ',' ',' '};
char myLocale[7]={' ',' ',' ',' ',' ',' ', '\0'};
cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
if(cnv->extraInfo != NULL) {
@ -493,7 +493,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
myConverterData->currentType = ASCII1;
cnv->fromUnicodeStatus =FALSE;
if(pArgs->locale){
uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale));
uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1);
}
version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
myConverterData->version = version;

View File

@ -68,4 +68,10 @@
# define _STDC_C99
#endif
#if !defined _POSIX_C_SOURCE && \
defined(__APPLE__) && defined(__MACH__) && !defined(__clang__)
// Needed to prevent EOWNERDEAD issues with GCC on Mac
#define _POSIX_C_SOURCE 200809L
#endif
#endif /* __UPOSIXDEFS_H__ */

View File

@ -24,6 +24,10 @@
#include "cstring.h"
#include "uresimp.h"
#include "unicode/coll.h"
#include "unicode/stringpiece.h"
#include "charstr.h"
U_NAMESPACE_USE
#ifdef UCOL_TRACE_SIT
# include <stdio.h>
@ -82,7 +86,6 @@ static const char providerKeyword[] = "@sp=";
static const int32_t locElementCount = UCOL_SIT_LOCELEMENT_MAX+1;
static const int32_t locElementCapacity = 32;
static const int32_t loc3066Capacity = 256;
static const int32_t locProviderCapacity = 10;
static const int32_t internalBufferSize = 512;
/* structure containing specification of a collator. Initialized
@ -90,20 +93,30 @@ static const int32_t internalBufferSize = 512;
* collator instance
*/
struct CollatorSpec {
char locElements[locElementCount][locElementCapacity];
char locale[loc3066Capacity];
char provider[locProviderCapacity];
inline CollatorSpec();
CharString locElements[locElementCount];
CharString locale;
UColAttributeValue options[UCOL_ATTRIBUTE_COUNT];
uint32_t variableTopValue;
UChar variableTopString[locElementCapacity];
int32_t variableTopStringLen;
UBool variableTopSet;
struct {
const char *start;
int32_t len;
} entries[UCOL_SIT_ITEMS_COUNT];
CharString entries[UCOL_SIT_ITEMS_COUNT];
};
CollatorSpec::CollatorSpec() :
locale(),
variableTopValue(0),
variableTopString(),
variableTopSet(FALSE)
{
// set collation options to default
for(int32_t i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
options[i] = UCOL_DEFAULT;
}
}
/* structure for converting between character attribute
* representation and real collation attribute value.
@ -156,18 +169,13 @@ static const char* U_CALLCONV
_processLocaleElement(CollatorSpec *spec, uint32_t value, const char* string,
UErrorCode *status)
{
int32_t len = 0;
do {
if(value == UCOL_SIT_LANGUAGE || value == UCOL_SIT_KEYWORD || value == UCOL_SIT_PROVIDER) {
spec->locElements[value][len++] = uprv_tolower(*string);
spec->locElements[value].append(uprv_tolower(*string), *status);
} else {
spec->locElements[value][len++] = *string;
spec->locElements[value].append(*string, *status);
}
} while(*(++string) != '_' && *string && len < locElementCapacity);
if(len >= locElementCapacity) {
*status = U_BUFFER_OVERFLOW_ERROR;
return string;
}
} while(*(++string) != '_' && *string && U_SUCCESS(*status));
// don't skip the underscore at the end
return string;
}
@ -185,7 +193,7 @@ _processRFC3066Locale(CollatorSpec *spec, uint32_t, const char* string,
*status = U_BUFFER_OVERFLOW_ERROR;
return string;
} else {
uprv_strncpy(spec->locale, string, end-string);
spec->locale.copyFrom(CharString(string, end-string, *status), *status);
return end+1;
}
}
@ -306,9 +314,12 @@ const char* ucol_sit_readOption(const char *start, CollatorSpec *spec,
for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
if(*start == options[i].optionStart) {
spec->entries[i].start = start;
const char* end = options[i].action(spec, options[i].attr, start+1, status);
spec->entries[i].len = (int32_t)(end - start);
#ifdef UCOL_TRACE_SIT
fprintf(stderr, "***Set %d to %s...\n", i, start);
#endif
// assume 'start' does not go away through all this
spec->entries[i].copyFrom(CharString(start, (int32_t)(end - start), *status), *status);
return end;
}
}
@ -319,18 +330,6 @@ const char* ucol_sit_readOption(const char *start, CollatorSpec *spec,
return start;
}
static
void ucol_sit_initCollatorSpecs(CollatorSpec *spec)
{
// reset everything
uprv_memset(spec, 0, sizeof(CollatorSpec));
// set collation options to default
int32_t i = 0;
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
spec->options[i] = UCOL_DEFAULT;
}
}
static const char*
ucol_sit_readSpecs(CollatorSpec *s, const char *string,
UParseError *parseError, UErrorCode *status)
@ -357,25 +356,25 @@ int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity,
char optName;
if(U_SUCCESS(*status)) {
for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
if(s->entries[i].start) {
if(!s->entries[i].isEmpty()) {
if(len) {
if(len < capacity) {
uprv_strcat(destination, "_");
}
len++;
}
optName = *(s->entries[i].start);
optName = s->entries[i][0];
if(optName == languageArg || optName == regionArg || optName == variantArg || optName == keywordArg) {
for(j = 0; j < s->entries[i].len; j++) {
for(j = 0; j < s->entries[i].length(); j++) {
if(len + j < capacity) {
destination[len+j] = uprv_toupper(*(s->entries[i].start+j));
destination[len+j] = uprv_toupper(s->entries[i][j]);
}
}
len += s->entries[i].len;
len += s->entries[i].length();
} else {
len += s->entries[i].len;
len += s->entries[i].length();
if(len < capacity) {
uprv_strncat(destination,s->entries[i].start, s->entries[i].len);
uprv_strncat(destination,s->entries[i].data(), s->entries[i].length());
}
}
}
@ -387,40 +386,40 @@ int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity,
}
static void
ucol_sit_calculateWholeLocale(CollatorSpec *s) {
ucol_sit_calculateWholeLocale(CollatorSpec *s, UErrorCode &status) {
// put the locale together, unless we have a done
// locale
if(s->locale[0] == 0) {
if(s->locale.isEmpty()) {
// first the language
uprv_strcat(s->locale, s->locElements[UCOL_SIT_LANGUAGE]);
s->locale.append(s->locElements[UCOL_SIT_LANGUAGE], status);
// then the script, if present
if(*(s->locElements[UCOL_SIT_SCRIPT])) {
uprv_strcat(s->locale, "_");
uprv_strcat(s->locale, s->locElements[UCOL_SIT_SCRIPT]);
if(!s->locElements[UCOL_SIT_SCRIPT].isEmpty()) {
s->locale.append("_", status);
s->locale.append(s->locElements[UCOL_SIT_SCRIPT], status);
}
// then the region, if present
if(*(s->locElements[UCOL_SIT_REGION])) {
uprv_strcat(s->locale, "_");
uprv_strcat(s->locale, s->locElements[UCOL_SIT_REGION]);
} else if(*(s->locElements[UCOL_SIT_VARIANT])) { // if there is a variant, we need an underscore
uprv_strcat(s->locale, "_");
if(!s->locElements[UCOL_SIT_REGION].isEmpty()) {
s->locale.append("_", status);
s->locale.append(s->locElements[UCOL_SIT_REGION], status);
} else if(!s->locElements[UCOL_SIT_VARIANT].isEmpty()) { // if there is a variant, we need an underscore
s->locale.append("_", status);
}
// add variant, if there
if(*(s->locElements[UCOL_SIT_VARIANT])) {
uprv_strcat(s->locale, "_");
uprv_strcat(s->locale, s->locElements[UCOL_SIT_VARIANT]);
if(!s->locElements[UCOL_SIT_VARIANT].isEmpty()) {
s->locale.append("_", status);
s->locale.append(s->locElements[UCOL_SIT_VARIANT], status);
}
// if there is a collation keyword, add that too
if(*(s->locElements[UCOL_SIT_KEYWORD])) {
uprv_strcat(s->locale, collationKeyword);
uprv_strcat(s->locale, s->locElements[UCOL_SIT_KEYWORD]);
if(!s->locElements[UCOL_SIT_KEYWORD].isEmpty()) {
s->locale.append(collationKeyword, status);
s->locale.append(s->locElements[UCOL_SIT_KEYWORD], status);
}
// if there is a provider keyword, add that too
if(*(s->locElements[UCOL_SIT_PROVIDER])) {
uprv_strcat(s->locale, providerKeyword);
uprv_strcat(s->locale, s->locElements[UCOL_SIT_PROVIDER]);
if(!s->locElements[UCOL_SIT_PROVIDER].isEmpty()) {
s->locale.append(providerKeyword, status);
s->locale.append(s->locElements[UCOL_SIT_PROVIDER], status);
}
}
}
@ -451,13 +450,12 @@ ucol_prepareShortStringOpen( const char *definition,
// analyse the string in order to get everything we need.
CollatorSpec s;
ucol_sit_initCollatorSpecs(&s);
ucol_sit_readSpecs(&s, definition, parseError, status);
ucol_sit_calculateWholeLocale(&s);
ucol_sit_calculateWholeLocale(&s, *status);
char buffer[internalBufferSize];
uprv_memset(buffer, 0, internalBufferSize);
uloc_canonicalize(s.locale, buffer, internalBufferSize, status);
uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status);
UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer, status);
/* we try to find stuff from keyword */
@ -522,13 +520,15 @@ ucol_openFromShortString( const char *definition,
// analyse the string in order to get everything we need.
const char *string = definition;
CollatorSpec s;
ucol_sit_initCollatorSpecs(&s);
string = ucol_sit_readSpecs(&s, definition, parseError, status);
ucol_sit_calculateWholeLocale(&s);
ucol_sit_calculateWholeLocale(&s, *status);
char buffer[internalBufferSize];
uprv_memset(buffer, 0, internalBufferSize);
uloc_canonicalize(s.locale, buffer, internalBufferSize, status);
#ifdef UCOL_TRACE_SIT
fprintf(stderr, "DEF %s, DATA %s, ERR %s\n", definition, s.locale.data(), u_errorName(*status));
#endif
uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status);
UCollator *result = ucol_open(buffer, status);
int32_t i = 0;
@ -604,7 +604,6 @@ ucol_normalizeShortDefinitionString(const char *definition,
// validate
CollatorSpec s;
ucol_sit_initCollatorSpecs(&s);
ucol_sit_readSpecs(&s, definition, parseError, status);
return ucol_sit_dumpSpecs(&s, destination, capacity, status);
}