/* ********************************************************************** * Copyright (C) 1999, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description * 12/05/99 aliu Creation. ********************************************************************** */ #include #include #include "unicode/utypes.h" #include "cmemory.h" #include "cstring.h" #include "filestrm.h" /********************************************************************* mkhelper exists to enable ICU to use some of the same build files that are used on UNIX. It was originally written to support the building of the numerous ICU converters on Windows, but it is general enough to be useful in other contexts. mkhelper is useful only on Windows; on UNIX you would accomplish the same task with a ten line perl script. mkhelper reads one or more input files and parses them looking for one or more makefile variable name definitions. For example, a typical input file looks like this: # A list of UCM's to build UCM_SOURCE = ibm-1038.ucm ibm-1047.ucm ibm-1089.ucm ibm-1123.ucm \ ibm-1140.ucm ibm-1141.ucm ibm-1142.ucm ibm-1143.ucm ibm-1144.ucm For this file, with no string-modifying options specified, mkhelper would emit the following to stdout: ibm-1038.ucm ibm-1047.ucm ... ibm-1144.ucm The input file can contain comments, blank lines, and variable definitions. Lines are continued if they end with '\'. The variable definitions assign a variable name (UCM_SOURCE in the above example) to a list of zero or more strings. mkhelper reads one or more files of this format. Its parsing is very rudimentary, so input files cannot contain anything other than the elements described above. It looks for variable names specified on the command line, and records their definitions. It then does optional string substitution on those definition strings. Finally it emits them to stdout, one per line, with optional prefix and/or suffix strings. If multiple files are read, definitions in later files (on the command line) replace definitions with the same name in earlier files. Because of problems with the Windows shell, it's difficult to use environment variables of the form %VAR% on the command line. To get around this, mkhelper supports the alternative syntax $VAR$ in the prefix, suffix, and string substitution options. Use "$$" to indicate the literal character '$'. If a substring of the form $VAR$ is seen in one of these command line parameters, mkhelper interprets it as an environment variable and substitutes its definition. Alan Liu 12/5/99 ****************************************************/ /* GLOBALS THAT ENCODE THE COMMAND-LINE ARGS */ const char* PREFIX = NULL; /* [-p] Prepend to each line */ const char* SUFFIX = NULL; /* [-s] Append to each line */ const char* OLD = NULL; /* [-old] Old pattern */ const char* NEW = NULL; /* [-new] New pattern */ #define MAX_VARS 8 int VARS_COUNT = 0; const char* VARS[MAX_VARS]; /* [-v] Variables to look for */ #define MAX_FILES 8 int FILES_COUNT = 0; const char* FILES[MAX_FILES]; /* Input files*/ /* GLOBALS THAT STORE THE VAR DEFS FROM THE FILES */ int VARS_DEF_COUNT[MAX_VARS]; /* Number of names in this var's def */ int VARS_DEF_FILE[MAX_VARS]; /* The index of the last file to define this var. This is used to let files listed later on the command line override files listed earlier. We only use the last file's definition. -1 if def not seen in any file. */ char** VARS_DEF[MAX_VARS]; /* Actual definitions */ int VARS_DEF_I[MAX_VARS]; /* Index var used by readVarDefs */ UBool VERBOSE = FALSE; /* CONSTANTS */ const char COMMENT = '#'; const char CR = ((char)13); const char LF = ((char)10); const char MINUS = '-'; const char SPACE = ' '; const char TAB = ((char)9); const char NUL = ((char)0); const char CONTINUE = '\\'; /* GLOBAL INPUT BUFFER */ #define BUFLEN 2048 char BUFFER[BUFLEN]; int32_t LINE_NO; /* processFile return code and callback */ enum FileStat { FS_ERR, FS_VARS_SEEN, FS_NO_VARS }; typedef void (*TokenHandler)(char* token, int32_t len, int fileNo, int varNo); /* PROTOTYPES */ int main(int argc, char *argv[]); void substitute(int varNo); void parseCommandLine(int argc, char *argv[]); void usage(const char* argv0); void countVarDefs(char* token, int32_t len, int fileNo, int varNo); void readVarDefs(char* token, int32_t len, int fileNo, int varNo); enum FileStat processFile(FileStream*, TokenHandler, int fileNo); void processLine(int fileNo, int varNo, char* line, UBool isFirstLine, TokenHandler proc); char* substituteEnvironmentVars(char*); char* skipWhiteSpace(char* p); int32_t readLine(FileStream*); void assert(int); /* use of UBool as arg causes warnings */ void memassert(void* a); /** * Emit usage and exit. */ void usage(const char* argv0) { fprintf(stderr, "\n" "Usage: mkhelper [options] infile...\n" " -p prefix Prepend the given string to each line\n" " -s suffix Append the given string to each line\n" " -old str String to replace with -new str in text. Comparison\n" " is case-sensitive. ONLY ONE REPLACEMENT MADE PER STRING.\n" " -new str String to replace -old str in text\n" " -n var... One or more variable names to parse\n" " -v Be verbose\n" " infile... One or more input files. Files listed earlier are\n" " overridden by files listed later.\n" "\n" "Read one or more simple UNIX-style makefile fragments, parse one or\n" "more variable definitions that define lists of strings, and emit those\n" "strings to stdout, one per line, after performing optional string\n" "manipulations. Definitions in later files replace those in earlier\n" "files. Use $VAR$ for environment vars. See source code for details.\n" , argv0); exit(1); } int main(int argc, char *argv[]) { int i, j; UBool err=FALSE; parseCommandLine(argc, argv); for (i=0; inew string pattern substitution on the given variable def. * We just do one replacement, but it would be easy to extend this to * to more. (Just feeling lazy right now.) */ void substitute(int varNo) { int i; int32_t oldLen = uprv_strlen(OLD); int32_t newLen = uprv_strlen(NEW); assert(oldLen > 0 && newLen > 0); for (i=0; i 0); (*proc)(line, len, fileNo, varNo); line = p; } } /** * Make a pass through a file, looking for variable def lines that * match our desired var, and handing them off to processLine. */ enum FileStat processFile(FileStream* in, TokenHandler proc, int fileNo) { int32_t len; char* p; int j; UBool varsSeen = FALSE; LINE_NO = 0; while ((len = readLine(in)) >= 0) { if (len) { UBool varFound = FALSE; /* Skip white space */ p = skipWhiteSpace(BUFFER); /* Is the next word a var? */ for (j=0; j= argc) { fprintf(stderr, "%s must be followed by something\n", arg); usage(argv[0]); } break; } switch (arg[1]) { case 'p': if (PREFIX) { fprintf(stderr, "Multiple -p options not allowed!\n", arg); usage(argv[0]); } PREFIX = substituteEnvironmentVars(argv[++i]); break; case 's': if (SUFFIX) { fprintf(stderr, "Multiple -s options not allowed!\n", arg); usage(argv[0]); } SUFFIX = substituteEnvironmentVars(argv[++i]); break; case 'v': VERBOSE = TRUE; break; case 'o': if (OLD) { fprintf(stderr, "Multiple -old options not allowed!\n", arg); usage(argv[0]); } OLD = substituteEnvironmentVars(argv[++i]); break; case 'n': if (arg[2] == 'e') { if (NEW) { fprintf(stderr, "Multiple -new options not allowed!\n", arg); usage(argv[0]); } NEW = substituteEnvironmentVars(argv[++i]); } else { if (VARS_COUNT == MAX_VARS) { fprintf(stderr, "Too many -n options -- fix tool and recompile!\n", arg); usage(argv[0]); } VARS[VARS_COUNT++] = argv[++i]; } break; default: fprintf(stderr, "Bad option %s\n", arg); usage(argv[0]); break; } } else { if (FILES_COUNT == MAX_FILES) { fprintf(stderr, "Too many input files -- fix tool and recompile!\n", arg); usage(argv[0]); } FILES[FILES_COUNT++] = arg; } } /* Make sure at least one input file and one variable exist */ if (FILES_COUNT < 1 || VARS_COUNT < 1) { fprintf(stderr, "Please specify at least one variable and one input file\n"); usage(argv[0]); } /* Need both old & new or neither */ if ((OLD != NULL) != (NEW != NULL)) { fprintf(stderr, "Specify both -old and -new, or neither\n"); usage(argv[0]); } if (VERBOSE) { if (OLD) { fprintf(stderr, "mkhelper: Substituting \"%s\" -> \"%s\"\n", OLD, NEW); } if (PREFIX) { fprintf(stderr, "mkhelper: Prefix \"%s\"\n", PREFIX); } if (SUFFIX) { fprintf(stderr, "mkhelper: Suffix \"%s\"\n", SUFFIX); } } } /** * Read one line into BUFFER, trim any comment, remove trailing white space * and line separators, bump the LINE_NO, return length. Return negative * value when EOF reached. */ int32_t readLine(FileStream* in) { char* p; ++LINE_NO; if (T_FileStream_readLine(in, BUFFER, BUFLEN) == NULL) { return -1; /* EOF */ } /* Trim off trailing comment */ p = uprv_strchr(BUFFER, COMMENT); if (p != 0) { /* Back up past any space or tab characters before * the comment character. */ while (p > BUFFER && (p[-1] == SPACE || p[-1] == TAB)) { p--; } *p = NUL; } /* Delete any trailing ^J and/or ^M characters */ p = BUFFER + uprv_strlen(BUFFER); while (p > BUFFER && (p[-1] == CR || p[-1] == LF)) { p--; } *p = NUL; return uprv_strlen(BUFFER); } void assert(int a) { if (!a) { fprintf(stderr, "Error(mkhelper): assertion failure\n"); exit(1); } } void memassert(void* a) { if (!a) { fprintf(stderr, "Error(mkhelper): out of memory\n"); exit(1); } }