scuffed-code/icu4c/source/tools/genprops/misc/ucdmerge.c

/*
*******************************************************************************
*
*   Copyright (C) 2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  ucdmerge.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003feb20
*   created by: Markus W. Scherer
*
*   Simple tool for Unicode Character Database files with semicolon-delimited fields.
*   Merges adjacent, identical per-code point data lines into one line with range syntax.
*
*   To compile, just call a C compiler/linker with this source file.
*   On Windows: cl ucdmerge.c
*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

static const char *
skipWhitespace(const char *s) {
    while(*s==' ' || *s=='\t') {
        ++s;
    }
    return s;
}

/* return the first character position after the end of the data */
static char *
endOfData(const char *l) {
    char *end;
    char c;

    end=strchr(l, '#');
    if(end!=NULL) {
        /* ignore whitespace before the comment */
        while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
            --end;
        }
    } else {
        end=strchr(l, 0);
    }
    return end;
}

static int
sameData(const char *l1, const char *l2) {
    char *end1, *end2;
    int length;

    /* find the first semicolon in each line - there must be one */
    l1=strchr(l1, ';')+1;
    l2=strchr(l2, ';')+1;

    /* find the end of data: end of string or start of comment */
    end1=endOfData(l1);
    end2=endOfData(l2);

    /* compare the line data portions */
    length=end1-l1;
    return length==(end2-l2) && 0==memcmp(l1, l2, length);
}

extern int
main(int argc, const char *argv[]) {
    static char line[2000], firstLine[2000], lastLine[2000];
    char *end;
    long first, last, c;
    int finished;

    first=last=-1;
    finished=0;

    for(;;) {
        if(gets(line)!=NULL) {
            /* parse the initial code point, if any */
            c=strtol(line, &end, 16);
            if(end!=line && *skipWhitespace(end)==';') {
                /* single code point followed by semicolon and data, keep c */
            } else {
                c=-1;
            }
        } else {
            line[0]=0;
            c=-1;
            finished=1;
        }

        if(last>=0 && (c!=(last+1) || !sameData(firstLine, line))) {
            /* output the current range */
            if(first==last) {
                /* there was no range, just output the one line we found */
                puts(firstLine);
            } else {
                /* there was a real range, merge their lines */
                end=strchr(lastLine, '#');
                if(end==NULL) {
                    /* no comment in second line */
                    printf("%04lX..%04lX%s\n",
                            first, last,            /* code point range */
                            strchr(firstLine, ';'));/* first line starting from the first ; */
                } else if(strchr(firstLine, '#')==NULL) {
                    /* no comment in first line */
                    printf("%04lX..%04lX%s%s\n",
                            first, last,            /* code point range */
                            strchr(firstLine, ';'), /* first line starting from the first ; */
                            end);                   /* comment from second line */
                } else {
                    /* merge comments from both lines */
                    printf("%04lX..%04lX%s..%s\n",
                            first, last,            /* code point range */
                            strchr(firstLine, ';'), /* first line starting from the first ; */
                            skipWhitespace(end+1)); /* comment from second line, after # and spaces */
                }
            }
            first=last=-1;
        }

        if(c<0) {
            if(finished) {
                break;
            }

            /* no data on this line, output as is */
            puts(line);
        } else {
            /* data on this line, store for possible range compaction */
            if(last<0) {
                /* set as the first line in a possible range */
                first=last=c;
                strcpy(firstLine, line);
                lastLine[0]=0;
            } else /* must be c==(last+1) && sameData() because of previous conditions */ {
                /* continue with the current range */
                last=c;
                strcpy(lastLine, line);
            }
        }
    }

    return 0;
}
ICU-2427 add UCD tools X-SVN-Rev: 11138 2003-02-21 16:17:42 +00:00			`/*`
			`*******************************************************************************`
			`*`
			`* Copyright (C) 2003, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`*`
			`*******************************************************************************`
			`* file name: ucdmerge.c`
			`* encoding: US-ASCII`
			`* tab size: 8 (not used)`
			`* indentation:4`
			`*`
			`* created on: 2003feb20`
			`* created by: Markus W. Scherer`
			`*`
			`* Simple tool for Unicode Character Database files with semicolon-delimited fields.`
			`* Merges adjacent, identical per-code point data lines into one line with range syntax.`
			`*`
			`* To compile, just call a C compiler/linker with this source file.`
			`* On Windows: cl ucdmerge.c`
			`*/`

			`#include <stdio.h>`
			`#include <string.h>`
			`#include <stdlib.h>`

			`static const char *`
			`skipWhitespace(const char *s) {`
			`while(s==' ' \|\| s=='\t') {`
			`++s;`
			`}`
			`return s;`
			`}`

			`/* return the first character position after the end of the data */`
			`static char *`
			`endOfData(const char *l) {`
			`char *end;`
			`char c;`

			`end=strchr(l, '#');`
			`if(end!=NULL) {`
			`/* ignore whitespace before the comment */`
			`while(l!=end && ((c=*(end-1))==' ' \|\| c=='\t')) {`
			`--end;`
			`}`
			`} else {`
			`end=strchr(l, 0);`
			`}`
			`return end;`
			`}`

			`static int`
			`sameData(const char l1, const char l2) {`
			`char end1, end2;`
			`int length;`

			`/* find the first semicolon in each line - there must be one */`
			`l1=strchr(l1, ';')+1;`
			`l2=strchr(l2, ';')+1;`

			`/* find the end of data: end of string or start of comment */`
			`end1=endOfData(l1);`
			`end2=endOfData(l2);`

			`/* compare the line data portions */`
			`length=end1-l1;`
			`return length==(end2-l2) && 0==memcmp(l1, l2, length);`
			`}`

			`extern int`
			`main(int argc, const char *argv[]) {`
			`static char line[2000], firstLine[2000], lastLine[2000];`
			`char *end;`
			`long first, last, c;`
			`int finished;`

			`first=last=-1;`
			`finished=0;`

			`for(;;) {`
			`if(gets(line)!=NULL) {`
			`/* parse the initial code point, if any */`
			`c=strtol(line, &end, 16);`
			`if(end!=line && *skipWhitespace(end)==';') {`
			`/* single code point followed by semicolon and data, keep c */`
			`} else {`
			`c=-1;`
			`}`
			`} else {`
			`line[0]=0;`
			`c=-1;`
			`finished=1;`
			`}`

			`if(last>=0 && (c!=(last+1) \|\| !sameData(firstLine, line))) {`
			`/* output the current range */`
			`if(first==last) {`
			`/* there was no range, just output the one line we found */`
			`puts(firstLine);`
			`} else {`
			`/* there was a real range, merge their lines */`
			`end=strchr(lastLine, '#');`
			`if(end==NULL) {`
			`/* no comment in second line */`
			`printf("%04lX..%04lX%s\n",`
			`first, last, /* code point range */`
			`strchr(firstLine, ';'));/* first line starting from the first ; */`
			`} else if(strchr(firstLine, '#')==NULL) {`
			`/* no comment in first line */`
			`printf("%04lX..%04lX%s%s\n",`
			`first, last, /* code point range */`
			`strchr(firstLine, ';'), /* first line starting from the first ; */`
			`end); /* comment from second line */`
			`} else {`
			`/* merge comments from both lines */`
			`printf("%04lX..%04lX%s..%s\n",`
			`first, last, /* code point range */`
			`strchr(firstLine, ';'), /* first line starting from the first ; */`
			`skipWhitespace(end+1)); /* comment from second line, after # and spaces */`
			`}`
			`}`
			`first=last=-1;`
			`}`

			`if(c<0) {`
			`if(finished) {`
			`break;`
			`}`

			`/* no data on this line, output as is */`
			`puts(line);`
			`} else {`
			`/* data on this line, store for possible range compaction */`
			`if(last<0) {`
			`/* set as the first line in a possible range */`
			`first=last=c;`
			`strcpy(firstLine, line);`
			`lastLine[0]=0;`
			`} else /* must be c==(last+1) && sameData() because of previous conditions */ {`
			`/* continue with the current range */`
			`last=c;`
			`strcpy(lastLine, line);`
			`}`
			`}`
			`}`

			`return 0;`
			`}`