scuffed-code/icu4c/source/samples/ugrep/ugrep.cpp

/*************************************************************************
*
*   © 2016 and later: Unicode, Inc. and others.
*   License & terms of use: http://www.unicode.org/copyright.html#License
*
**************************************************************************
**************************************************************************
*
*   Copyright (C) 2002-2010, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
***************************************************************************
*/

//
//   ugrep  - an ICU sample program illustrating the use of ICU Regular Expressions.
//
//            The use of the ICU Regex API all occurs within the main()
//            function.  The rest of the code deals with opening files,
//            encoding conversions, printing results, etc.
//
//            This is not a full-featured grep program.  The command line options
//            have been kept to a minimum to avoid complicating the sample code.
//


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/regex.h"
#include "unicode/ucnv.h"
#include "unicode/uclean.h"

using namespace icu;

//
//  The following variables contain parameters that may be set from the command line.
//
const char *pattern = NULL;     // The regular expression
int        firstFileNum;        //  argv index of the first file name
UBool      displayFileName = FALSE;
UBool      displayLineNum  = FALSE;


//
//  Info regarding the file currently being processed
//
const char *fileName;      
int         fileLen;              // Length, in UTF-16 Code Units.  

UChar      *ucharBuf = 0;         // Buffer, holds converted file.  (Simple minded program, always reads
                                  //   the whole file at once.

char       *charBuf = 0;          // Buffer, for original, unconverted file data.


//
//  Info regarding the line currently being processed
//
int      lineStart;     // Index of first char of the current line in the file buffer
int      lineEnd;       // Index of char following the new line sequence for the current line
int      lineNum;

//
//  Converter, used on output to convert Unicode data back to char *
//             so that it will display in non-Unicode terminal windows.
//
UConverter  *outConverter = 0;

//
//  Function forward declarations
//
void processOptions(int argc, const char **argv);
void nextLine(int start);
void printMatch();
void printUsage();
void readFile(const char *name);


//------------------------------------------------------------------------------------------
//
//   main          for ugrep
//
//           Structurally, all use of the ICU Regular Expression API is in main(),
//           and all of the supporting stuff necessary to make a running program, but
//           not directly related to regular expressions, is factored out into these other
//           functions.
//
//------------------------------------------------------------------------------------------
int main(int argc, const char** argv) {
    UBool     matchFound = FALSE;

    //
    //  Process the command line options.
    //
    processOptions(argc, argv);

    //
    // Create a RegexPattern object from the user supplied pattern string.
    //
    UErrorCode status = U_ZERO_ERROR;   // All ICU operations report success or failure
                                        //   in a status variable.

    UParseError    parseErr;            // In the event of a syntax error in the regex pattern,
                                        //   this struct will contain the position of the
                                        //   error.

    RegexPattern  *rePat = RegexPattern::compile(pattern, parseErr, status);
                                        // Note that C++ is doing an automatic conversion
                                        //  of the (char *) pattern to a temporary
                                        //  UnicodeString object.
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep:  error in pattern: \"%s\" at position %d\n",
            u_errorName(status), parseErr.offset);
        exit(-1);
    }

    //
    // Create a RegexMatcher from the newly created pattern.
    //
    UnicodeString empty;
    RegexMatcher *matcher = rePat->matcher(empty, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep:  error in creating RegexMatcher: \"%s\"\n",
            u_errorName(status));
        exit(-1);
    }

    //
    // Loop, processing each of the input files.
    //
    for (int fileNum=firstFileNum; fileNum < argc; fileNum++) {
        readFile(argv[fileNum]);

        //
        //  Loop through the lines of a file, trying to match the regex pattern on each.
        //
        for (nextLine(0); lineStart<fileLen; nextLine(lineEnd)) {
            UnicodeString s(FALSE, ucharBuf+lineStart, lineEnd-lineStart);
            matcher->reset(s);
            if (matcher->find()) {
                matchFound = TRUE;
                printMatch();
            }
        }
    }

    //
    //  Clean up
    //
    delete matcher;
    delete rePat;
    free(ucharBuf);
    free(charBuf);
    ucnv_close(outConverter);
    
    u_cleanup();       // shut down ICU, release any cached data it owns.

    return matchFound? 0: 1;
}


//------------------------------------------------------------------------------------------
//
//   doOptions          Run through the command line options, and set
//                      the global variables accordingly.
//
//                      exit without returning if an error occurred and
//                      ugrep should not proceed further.
//
//------------------------------------------------------------------------------------------
void processOptions(int argc, const char **argv) {
    int            optInd;
    UBool          doUsage   = FALSE;
    UBool          doVersion = FALSE;
    const char    *arg;


    for(optInd = 1; optInd < argc; ++optInd) {
        arg = argv[optInd];
        
        /* version info */
        if(strcmp(arg, "-V") == 0 || strcmp(arg, "--version") == 0) {
            doVersion = TRUE;
        }
        /* usage info */
        else if(strcmp(arg, "--help") == 0) {
            doUsage = TRUE;
        }
        else if(strcmp(arg, "-n") == 0 || strcmp(arg, "--line-number") == 0) {
            displayLineNum = TRUE;
        }
        /* POSIX.1 says all arguments after -- are not options */
        else if(strcmp(arg, "--") == 0) {
            /* skip the -- */
            ++optInd;
            break;
        }
        /* unrecognized option */
        else if(strncmp(arg, "-", strlen("-")) == 0) {
            printf("ugrep: invalid option -- %s\n", arg+1);
            doUsage = TRUE;
        }
        /* done with options */
        else {
            break;
        }
    }

    if (doUsage) {
        printUsage();
        exit(0);
    }

    if (doVersion) {
        printf("ugrep version 0.01\n");
        if (optInd == argc) {
            exit(0);
        }
    }

    int  remainingArgs = argc-optInd;     // pattern file ...
    if (remainingArgs < 2) {
        fprintf(stderr, "ugrep:  files or pattern are missing.\n");
        printUsage();
        exit(1);
    }

    if (remainingArgs > 2) {
        // More than one file to be processed.   Display file names with match output.
        displayFileName = TRUE;
    }

    pattern      = argv[optInd];
    firstFileNum = optInd+1;
}

//------------------------------------------------------------------------------------------
//
//   printUsage
//
//------------------------------------------------------------------------------------------
void printUsage() {
    printf("ugrep [options] pattern file...\n"
        "     -V or --version     display version information\n"
        "     --help              display this help and exit\n"
        "     --                  stop further option processing\n"
        "-n,  --line-number       Prefix each line of output with the line number within its input file.\n"
        );
    exit(0);
}

//------------------------------------------------------------------------------------------
//
//    readFile          Read a file into memory, and convert it to Unicode.
//
//                      Since this is just a demo program, take the simple minded approach
//                      of always reading the whole file at once.  No intelligent buffering
//                      is done.
//
//------------------------------------------------------------------------------------------
void readFile(const char *name) {

    //
    //  Initialize global file variables
    //
    fileName = name;
    fileLen  = 0;      // zero length prevents processing in case of errors.


    //
    //  Open the file and determine its size.
    //
    FILE *file = fopen(name, "rb");
    if (file == 0 ) {
        fprintf(stderr, "ugrep: Could not open file \"%s\"\n", fileName);
        return;
    }
    fseek(file, 0, SEEK_END);
    int rawFileLen = ftell(file);
    fseek(file, 0, SEEK_SET);
    

    //
    //   Read in the file
    //
    charBuf    = (char *)realloc(charBuf, rawFileLen+1);   // Need error checking...
    int t = static_cast<int>(fread(charBuf, 1, rawFileLen, file));
    if (t != rawFileLen)  {
        fprintf(stderr, "Error reading file \"%s\"\n", fileName);
        fclose(file);
        return;
    }
    charBuf[rawFileLen]=0;
    fclose(file);

    //
    // Look for a Unicode Signature (BOM) in the data
    //
    int32_t        signatureLength;
    const char *   charDataStart = charBuf;
    UErrorCode     status        = U_ZERO_ERROR;
    const char*    encoding      = ucnv_detectUnicodeSignature(
                           charDataStart, rawFileLen, &signatureLength, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep: ICU Error \"%s\" from ucnv_detectUnicodeSignature()\n",
            u_errorName(status));
        return;
    }
    if(encoding!=NULL ){
        charDataStart  += signatureLength;
        rawFileLen     -= signatureLength;
    }

    //
    // Open a converter to take the file to UTF-16
    //
    UConverter* conv;
    conv = ucnv_open(encoding, &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep: ICU Error \"%s\" from ucnv_open()\n", u_errorName(status));
        return;
    }

    //
    // Convert the file data to UChar.
    //  Preflight first to determine required buffer size.
    //
    uint32_t destCap = ucnv_toUChars(conv,
                       NULL,           //  dest,
                       0,              //  destCapacity,
                       charDataStart,
                       rawFileLen,
                       &status);
    if (status != U_BUFFER_OVERFLOW_ERROR) {
        fprintf(stderr, "ugrep: ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        return;
    };
    
    status = U_ZERO_ERROR;
    ucharBuf = (UChar *)realloc(ucharBuf, (destCap+1) * sizeof(UChar));
    ucnv_toUChars(conv,
        ucharBuf,           //  dest,
        destCap+1,
        charDataStart,
        rawFileLen,
        &status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep: ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
        return;
    };
    ucnv_close(conv);
    
    //
    //  Successful conversion.  Set the global size variables so that
    //     the rest of the processing will proceed for this file.
    //
    fileLen = destCap;
}
    
    
//------------------------------------------------------------------------------------------
//
//   nextLine           Advance the line index variables, starting at the
//                      specified position in the input file buffer, by
//                      scanning forward until the next end-of-line.
//
//                      Need to take into account all of the possible Unicode
//                      line ending sequences.
//
//------------------------------------------------------------------------------------------
void nextLine(int  startPos) {
    if (startPos == 0) {
        lineNum = 0;
    } else {
        lineNum++;
    }
    lineStart = lineEnd = startPos;

    for (;;) {
        if (lineEnd >= fileLen) {
            return;
        }
        UChar c = ucharBuf[lineEnd];
        lineEnd++;
        if (c == 0x0a   ||       // Line Feed
            c == 0x0c   ||       // Form Feed
            c == 0x0d   ||       // Carriage Return
            c == 0x85   ||       // Next Line
            c == 0x2028 ||       // Line Separator
            c == 0x2029)         // Paragraph separator
        { 
            break;
        }
    }

    // Check for CR/LF sequence, and advance over the LF if we're in the middle of one.
    if (lineEnd < fileLen           &&
        ucharBuf[lineEnd-1] == 0x0d &&
        ucharBuf[lineEnd]   == 0x0a) 
    {
        lineEnd++;
    }
}


//------------------------------------------------------------------------------------------
//
//   printMatch         Called when a matching line has been located.
//                      Print out the line from the file with the match, after
//                         converting it back to the default code page.
//
//------------------------------------------------------------------------------------------
void printMatch() {
    char                buf[2000];
    UErrorCode         status       = U_ZERO_ERROR;

    // If we haven't already created a converter for output, do it now.
    if (outConverter == 0) {
        outConverter = ucnv_open(NULL, &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ugrep:  Error opening default converter: \"%s\"\n",
                u_errorName(status));
            exit(-1);
        }
    };

    // Convert the line to be printed back to the default 8 bit code page.
    //   If the line is too long for our buffer, just truncate it.
    ucnv_fromUChars(outConverter,
                    buf,                   // destination buffer for conversion
                    sizeof(buf),           // capacity of destination buffer
                    &ucharBuf[lineStart],   // Input to conversion
                    lineEnd-lineStart,     // number of UChars to convert
                    &status);
    buf[sizeof(buf)-1] = 0;                // Add null for use in case of too long lines.
                                           // The converter null-terminates its output unless
                                           //   the buffer completely fills.
   
    if (displayFileName) {
        printf("%s:", fileName);
    }
    if (displayLineNum) {
        printf("%d:", lineNum);
    }
    printf("%s", buf);
}
ICU-12761 Adds Unicode copyright notice. X-SVN-Rev: 39388 2016-09-28 22:12:27 +00:00			`/*************************************************************************`
			`*`
ICU-12764 icu4c utf-8 source files, update Copyright notices. X-SVN-Rev: 39583 2017-01-20 00:20:31 +00:00			`* © 2016 and later: Unicode, Inc. and others.`
ICU-12761 Adds Unicode copyright notice. X-SVN-Rev: 39388 2016-09-28 22:12:27 +00:00			`* License & terms of use: http://www.unicode.org/copyright.html#License`
			`*`
			`**************************************************************************`
			`**************************************************************************`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`*`
ICU-12564 Reverted r38761 and r38762, because we want to prepend the Unicode copyright for existing source files, instead of replacing copyright comments. X-SVN-Rev: 38776 2016-05-31 21:45:07 +00:00			`* Copyright (C) 2002-2010, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`*`
			`***************************************************************************`
			`*/`

			`//`
			`// ugrep - an ICU sample program illustrating the use of ICU Regular Expressions.`
			`//`
			`// The use of the ICU Regex API all occurs within the main()`
ICU-13581 Fix typos in Samples, and add .vs folder to SVN ignore list. X-SVN-Rev: 40989 2018-02-27 00:04:14 +00:00			`// function. The rest of the code deals with opening files,`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`// encoding conversions, printing results, etc.`
			`//`
			`// This is not a full-featured grep program. The command line options`
			`// have been kept to a minimum to avoid complicating the sample code.`
			`//`



			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`

			`#include "unicode/utypes.h"`
			`#include "unicode/ustring.h"`
			`#include "unicode/regex.h"`
			`#include "unicode/ucnv.h"`
			`#include "unicode/uclean.h"`

ICU-13171 Update the VS project files to use VS 2017, plus various fixes to get Samples compiling. Also add explicit defines for WINVER and _WIN32_WINNT for Windows 7 targeting in non-UWP projects. X-SVN-Rev: 40935 2018-02-16 03:16:03 +00:00			`using namespace icu;`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00
			`//`
ICU-13581 Fix typos in Samples, and add .vs folder to SVN ignore list. X-SVN-Rev: 40989 2018-02-27 00:04:14 +00:00			`// The following variables contain parameters that may be set from the command line.`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`//`
			`const char *pattern = NULL; // The regular expression`
			`int firstFileNum; // argv index of the first file name`
			`UBool displayFileName = FALSE;`
			`UBool displayLineNum = FALSE;`


			`//`
			`// Info regarding the file currently being processed`
			`//`
			`const char *fileName;`
			`int fileLen; // Length, in UTF-16 Code Units.`

			`UChar *ucharBuf = 0; // Buffer, holds converted file. (Simple minded program, always reads`
			`// the whole file at once.`

			`char *charBuf = 0; // Buffer, for original, unconverted file data.`


			`//`
			`// Info regarding the line currently being processed`
			`//`
			`int lineStart; // Index of first char of the current line in the file buffer`
			`int lineEnd; // Index of char following the new line sequence for the current line`
			`int lineNum;`

			`//`
			`// Converter, used on output to convert Unicode data back to char *`
			`// so that it will display in non-Unicode terminal windows.`
			`//`
			`UConverter *outConverter = 0;`

			`//`
			`// Function forward declarations`
			`//`
			`void processOptions(int argc, const char **argv);`
			`void nextLine(int start);`
			`void printMatch();`
			`void printUsage();`
			`void readFile(const char *name);`



			`//------------------------------------------------------------------------------------------`
			`//`
			`// main for ugrep`
			`//`
			`// Structurally, all use of the ICU Regular Expression API is in main(),`
			`// and all of the supporting stuff necessary to make a running program, but`
			`// not directly related to regular expressions, is factored out into these other`
			`// functions.`
			`//`
			`//------------------------------------------------------------------------------------------`
			`int main(int argc, const char** argv) {`
			`UBool matchFound = FALSE;`

			`//`
ICU-13581 Fix typos in Samples, and add .vs folder to SVN ignore list. X-SVN-Rev: 40989 2018-02-27 00:04:14 +00:00			`// Process the command line options.`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`//`
			`processOptions(argc, argv);`

			`//`
			`// Create a RegexPattern object from the user supplied pattern string.`
			`//`
			`UErrorCode status = U_ZERO_ERROR; // All ICU operations report success or failure`
			`// in a status variable.`

			`UParseError parseErr; // In the event of a syntax error in the regex pattern,`
			`// this struct will contain the position of the`
			`// error.`

			`RegexPattern *rePat = RegexPattern::compile(pattern, parseErr, status);`
			`// Note that C++ is doing an automatic conversion`
			`// of the (char *) pattern to a temporary`
			`// UnicodeString object.`
			`if (U_FAILURE(status)) {`
			`fprintf(stderr, "ugrep: error in pattern: \"%s\" at position %d\n",`
			`u_errorName(status), parseErr.offset);`
			`exit(-1);`
			`}`

			`//`
			`// Create a RegexMatcher from the newly created pattern.`
			`//`
			`UnicodeString empty;`
			`RegexMatcher *matcher = rePat->matcher(empty, status);`
			`if (U_FAILURE(status)) {`
			`fprintf(stderr, "ugrep: error in creating RegexMatcher: \"%s\"\n",`
			`u_errorName(status));`
			`exit(-1);`
			`}`

			`//`
			`// Loop, processing each of the input files.`
			`//`
			`for (int fileNum=firstFileNum; fileNum < argc; fileNum++) {`
			`readFile(argv[fileNum]);`

			`//`
			`// Loop through the lines of a file, trying to match the regex pattern on each.`
			`//`
			`for (nextLine(0); lineStart<fileLen; nextLine(lineEnd)) {`
			`UnicodeString s(FALSE, ucharBuf+lineStart, lineEnd-lineStart);`
			`matcher->reset(s);`
			`if (matcher->find()) {`
			`matchFound = TRUE;`
			`printMatch();`
			`}`
			`}`
			`}`

			`//`
			`// Clean up`
			`//`
			`delete matcher;`
			`delete rePat;`
			`free(ucharBuf);`
			`free(charBuf);`
			`ucnv_close(outConverter);`

			`u_cleanup(); // shut down ICU, release any cached data it owns.`

			`return matchFound? 0: 1;`
			`}`



			`//------------------------------------------------------------------------------------------`
			`//`
			`// doOptions Run through the command line options, and set`
			`// the global variables accordingly.`
			`//`
ICU-13581 Fix typos in Samples, and add .vs folder to SVN ignore list. X-SVN-Rev: 40989 2018-02-27 00:04:14 +00:00			`// exit without returning if an error occurred and`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`// ugrep should not proceed further.`
			`//`
			`//------------------------------------------------------------------------------------------`
			`void processOptions(int argc, const char **argv) {`
			`int optInd;`
			`UBool doUsage = FALSE;`
			`UBool doVersion = FALSE;`
			`const char *arg;`


			`for(optInd = 1; optInd < argc; ++optInd) {`
			`arg = argv[optInd];`

			`/* version info */`
			`if(strcmp(arg, "-V") == 0 \|\| strcmp(arg, "--version") == 0) {`
			`doVersion = TRUE;`
			`}`
			`/* usage info */`
			`else if(strcmp(arg, "--help") == 0) {`
			`doUsage = TRUE;`
			`}`
ICU-0105 regex sample cleanup & unix makefile X-SVN-Rev: 10598 2002-12-11 01:33:05 +00:00			`else if(strcmp(arg, "-n") == 0 \|\| strcmp(arg, "--line-number") == 0) {`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`displayLineNum = TRUE;`
			`}`
			`/* POSIX.1 says all arguments after -- are not options */`
			`else if(strcmp(arg, "--") == 0) {`
			`/* skip the -- */`
			`++optInd;`
			`break;`
			`}`
			`/* unrecognized option */`
			`else if(strncmp(arg, "-", strlen("-")) == 0) {`
			`printf("ugrep: invalid option -- %s\n", arg+1);`
			`doUsage = TRUE;`
			`}`
			`/* done with options */`
			`else {`
			`break;`
			`}`
			`}`

			`if (doUsage) {`
			`printUsage();`
			`exit(0);`
			`}`

			`if (doVersion) {`
ICU-0105 regex sample cleanup & unix makefile X-SVN-Rev: 10598 2002-12-11 01:33:05 +00:00			`printf("ugrep version 0.01\n");`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`if (optInd == argc) {`
			`exit(0);`
			`}`
			`}`

			`int remainingArgs = argc-optInd; // pattern file ...`
			`if (remainingArgs < 2) {`
			`fprintf(stderr, "ugrep: files or pattern are missing.\n");`
			`printUsage();`
			`exit(1);`
			`}`

			`if (remainingArgs > 2) {`
			`// More than one file to be processed. Display file names with match output.`
			`displayFileName = TRUE;`
			`}`

			`pattern = argv[optInd];`
			`firstFileNum = optInd+1;`
			`}`

			`//------------------------------------------------------------------------------------------`
			`//`
			`// printUsage`
			`//`
			`//------------------------------------------------------------------------------------------`
			`void printUsage() {`
			`printf("ugrep [options] pattern file...\n"`
			`" -V or --version display version information\n"`
			`" --help display this help and exit\n"`
			`" -- stop further option processing\n"`
			`"-n, --line-number Prefix each line of output with the line number within its input file.\n"`
			`);`
			`exit(0);`
			`}`

			`//------------------------------------------------------------------------------------------`
			`//`
			`// readFile Read a file into memory, and convert it to Unicode.`
			`//`
			`// Since this is just a demo program, take the simple minded approach`
			`// of always reading the whole file at once. No intelligent buffering`
			`// is done.`
			`//`
			`//------------------------------------------------------------------------------------------`
			`void readFile(const char *name) {`

			`//`
			`// Initialize global file variables`
			`//`
			`fileName = name;`
			`fileLen = 0; // zero length prevents processing in case of errors.`


			`//`
			`// Open the file and determine its size.`
			`//`
			`FILE *file = fopen(name, "rb");`
			`if (file == 0 ) {`
			`fprintf(stderr, "ugrep: Could not open file \"%s\"\n", fileName);`
			`return;`
			`}`
			`fseek(file, 0, SEEK_END);`
			`int rawFileLen = ftell(file);`
			`fseek(file, 0, SEEK_SET);`


			`//`
			`// Read in the file`
			`//`
			`charBuf = (char *)realloc(charBuf, rawFileLen+1); // Need error checking...`
ICU-13171 Fix issues with ICU4C Samples, and various issues with vcxproj files. The samples now all build on Win32, except for the layout sample (but the LE is deprecated). X-SVN-Rev: 40951 2018-02-20 10:03:29 +00:00			`int t = static_cast<int>(fread(charBuf, 1, rawFileLen, file));`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`if (t != rawFileLen) {`
			`fprintf(stderr, "Error reading file \"%s\"\n", fileName);`
ICU-7311 Replace tab character with spaces in changes from the patch. X-SVN-Rev: 27652 2010-02-24 19:46:03 +00:00			`fclose(file);`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`return;`
			`}`
			`charBuf[rawFileLen]=0;`
			`fclose(file);`

			`//`
			`// Look for a Unicode Signature (BOM) in the data`
			`//`
			`int32_t signatureLength;`
			`const char * charDataStart = charBuf;`
			`UErrorCode status = U_ZERO_ERROR;`
			`const char* encoding = ucnv_detectUnicodeSignature(`
			`charDataStart, rawFileLen, &signatureLength, &status);`
			`if (U_FAILURE(status)) {`
			`fprintf(stderr, "ugrep: ICU Error \"%s\" from ucnv_detectUnicodeSignature()\n",`
			`u_errorName(status));`
			`return;`
			`}`
			`if(encoding!=NULL ){`
			`charDataStart += signatureLength;`
			`rawFileLen -= signatureLength;`
			`}`

			`//`
			`// Open a converter to take the file to UTF-16`
			`//`
			`UConverter* conv;`
			`conv = ucnv_open(encoding, &status);`
			`if (U_FAILURE(status)) {`
			`fprintf(stderr, "ugrep: ICU Error \"%s\" from ucnv_open()\n", u_errorName(status));`
			`return;`
			`}`

			`//`
			`// Convert the file data to UChar.`
			`// Preflight first to determine required buffer size.`
			`//`
			`uint32_t destCap = ucnv_toUChars(conv,`
			`NULL, // dest,`
			`0, // destCapacity,`
			`charDataStart,`
			`rawFileLen,`
			`&status);`
			`if (status != U_BUFFER_OVERFLOW_ERROR) {`
			`fprintf(stderr, "ugrep: ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));`
			`return;`
			`};`

			`status = U_ZERO_ERROR;`
			`ucharBuf = (UChar )realloc(ucharBuf, (destCap+1) sizeof(UChar));`
			`ucnv_toUChars(conv,`
			`ucharBuf, // dest,`
			`destCap+1,`
			`charDataStart,`
			`rawFileLen,`
			`&status);`
			`if (U_FAILURE(status)) {`
			`fprintf(stderr, "ugrep: ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));`
			`return;`
			`};`
			`ucnv_close(conv);`

			`//`
			`// Successful conversion. Set the global size variables so that`
			`// the rest of the processing will proceed for this file.`
			`//`
			`fileLen = destCap;`
			`}`





			`//------------------------------------------------------------------------------------------`
			`//`
			`// nextLine Advance the line index variables, starting at the`
			`// specified position in the input file buffer, by`
ICU-13581 Fix typos in Samples, and add .vs folder to SVN ignore list. X-SVN-Rev: 40989 2018-02-27 00:04:14 +00:00			`// scanning forward until the next end-of-line.`
ICU-0105 add regular expression sample program X-SVN-Rev: 10587 2002-12-10 22:02:56 +00:00			`//`
			`// Need to take into account all of the possible Unicode`
			`// line ending sequences.`
			`//`
			`//------------------------------------------------------------------------------------------`
			`void nextLine(int startPos) {`
			`if (startPos == 0) {`
			`lineNum = 0;`
			`} else {`
			`lineNum++;`
			`}`
			`lineStart = lineEnd = startPos;`

			`for (;;) {`
			`if (lineEnd >= fileLen) {`
			`return;`
			`}`
			`UChar c = ucharBuf[lineEnd];`
			`lineEnd++;`
			`if (c == 0x0a \|\| // Line Feed`
			`c == 0x0c \|\| // Form Feed`
			`c == 0x0d \|\| // Carriage Return`
			`c == 0x85 \|\| // Next Line`
			`c == 0x2028 \|\| // Line Separator`
			`c == 0x2029) // Paragraph separator`
			`{`
			`break;`
			`}`
			`}`

			`// Check for CR/LF sequence, and advance over the LF if we're in the middle of one.`
			`if (lineEnd < fileLen &&`
			`ucharBuf[lineEnd-1] == 0x0d &&`
			`ucharBuf[lineEnd] == 0x0a)`
			`{`
			`lineEnd++;`
			`}`
			`}`


			`//------------------------------------------------------------------------------------------`
			`//`
			`// printMatch Called when a matching line has been located.`
			`// Print out the line from the file with the match, after`
			`// converting it back to the default code page.`
			`//`
			`//------------------------------------------------------------------------------------------`
			`void printMatch() {`
			`char buf[2000];`
			`UErrorCode status = U_ZERO_ERROR;`

			`// If we haven't already created a converter for output, do it now.`
			`if (outConverter == 0) {`
			`outConverter = ucnv_open(NULL, &status);`
			`if (U_FAILURE(status)) {`
			`fprintf(stderr, "ugrep: Error opening default converter: \"%s\"\n",`
			`u_errorName(status));`
			`exit(-1);`
			`}`
			`};`

			`// Convert the line to be printed back to the default 8 bit code page.`
			`// If the line is too long for our buffer, just truncate it.`
			`ucnv_fromUChars(outConverter,`
			`buf, // destination buffer for conversion`
			`sizeof(buf), // capacity of destination buffer`
			`&ucharBuf[lineStart], // Input to conversion`
			`lineEnd-lineStart, // number of UChars to convert`
			`&status);`
			`buf[sizeof(buf)-1] = 0; // Add null for use in case of too long lines.`
			`// The converter null-terminates its output unless`
			`// the buffer completely fills.`

			`if (displayFileName) {`
			`printf("%s:", fileName);`
			`}`
			`if (displayLineNum) {`
			`printf("%d:", lineNum);`
			`}`
			`printf("%s", buf);`
			`}`