ICU-2427 add UCD tools
X-SVN-Rev: 11138
This commit is contained in:
parent
513a23fdae
commit
ed742654b3
149
icu4c/source/tools/genprops/misc/ucdmerge.c
Normal file
149
icu4c/source/tools/genprops/misc/ucdmerge.c
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ucdmerge.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb20
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Simple tool for Unicode Character Database files with semicolon-delimited fields.
|
||||
* Merges adjacent, identical per-code point data lines into one line with range syntax.
|
||||
*
|
||||
* To compile, just call a C compiler/linker with this source file.
|
||||
* On Windows: cl ucdmerge.c
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static const char *
|
||||
skipWhitespace(const char *s) {
|
||||
while(*s==' ' || *s=='\t') {
|
||||
++s;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* return the first character position after the end of the data */
|
||||
static char *
|
||||
endOfData(const char *l) {
|
||||
char *end;
|
||||
char c;
|
||||
|
||||
end=strchr(l, '#');
|
||||
if(end!=NULL) {
|
||||
/* ignore whitespace before the comment */
|
||||
while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
|
||||
--end;
|
||||
}
|
||||
} else {
|
||||
end=strchr(l, 0);
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
static int
|
||||
sameData(const char *l1, const char *l2) {
|
||||
char *end1, *end2;
|
||||
int length;
|
||||
|
||||
/* find the first semicolon in each line - there must be one */
|
||||
l1=strchr(l1, ';')+1;
|
||||
l2=strchr(l2, ';')+1;
|
||||
|
||||
/* find the end of data: end of string or start of comment */
|
||||
end1=endOfData(l1);
|
||||
end2=endOfData(l2);
|
||||
|
||||
/* compare the line data portions */
|
||||
length=end1-l1;
|
||||
return length==(end2-l2) && 0==memcmp(l1, l2, length);
|
||||
}
|
||||
|
||||
extern int
|
||||
main(int argc, const char *argv[]) {
|
||||
static char line[2000], firstLine[2000], lastLine[2000];
|
||||
char *end;
|
||||
long first, last, c;
|
||||
int finished;
|
||||
|
||||
first=last=-1;
|
||||
finished=0;
|
||||
|
||||
for(;;) {
|
||||
if(gets(line)!=NULL) {
|
||||
/* parse the initial code point, if any */
|
||||
c=strtol(line, &end, 16);
|
||||
if(end!=line && *skipWhitespace(end)==';') {
|
||||
/* single code point followed by semicolon and data, keep c */
|
||||
} else {
|
||||
c=-1;
|
||||
}
|
||||
} else {
|
||||
line[0]=0;
|
||||
c=-1;
|
||||
finished=1;
|
||||
}
|
||||
|
||||
if(last>=0 && (c!=(last+1) || !sameData(firstLine, line))) {
|
||||
/* output the current range */
|
||||
if(first==last) {
|
||||
/* there was no range, just output the one line we found */
|
||||
puts(firstLine);
|
||||
} else {
|
||||
/* there was a real range, merge their lines */
|
||||
end=strchr(lastLine, '#');
|
||||
if(end==NULL) {
|
||||
/* no comment in second line */
|
||||
printf("%04lX..%04lX%s\n",
|
||||
first, last, /* code point range */
|
||||
strchr(firstLine, ';'));/* first line starting from the first ; */
|
||||
} else if(strchr(firstLine, '#')==NULL) {
|
||||
/* no comment in first line */
|
||||
printf("%04lX..%04lX%s%s\n",
|
||||
first, last, /* code point range */
|
||||
strchr(firstLine, ';'), /* first line starting from the first ; */
|
||||
end); /* comment from second line */
|
||||
} else {
|
||||
/* merge comments from both lines */
|
||||
printf("%04lX..%04lX%s..%s\n",
|
||||
first, last, /* code point range */
|
||||
strchr(firstLine, ';'), /* first line starting from the first ; */
|
||||
skipWhitespace(end+1)); /* comment from second line, after # and spaces */
|
||||
}
|
||||
}
|
||||
first=last=-1;
|
||||
}
|
||||
|
||||
if(c<0) {
|
||||
if(finished) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* no data on this line, output as is */
|
||||
puts(line);
|
||||
} else {
|
||||
/* data on this line, store for possible range compaction */
|
||||
if(last<0) {
|
||||
/* set as the first line in a possible range */
|
||||
first=last=c;
|
||||
strcpy(firstLine, line);
|
||||
lastLine[0]=0;
|
||||
} else /* must be c==(last+1) && sameData() because of previous conditions */ {
|
||||
/* continue with the current range */
|
||||
last=c;
|
||||
strcpy(lastLine, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
59
icu4c/source/tools/genprops/misc/ucdstrip.c
Normal file
59
icu4c/source/tools/genprops/misc/ucdstrip.c
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ucdstrip.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2003feb20
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Simple tool for Unicode Character Database files with semicolon-delimited fields.
|
||||
* Removes comments behind data lines but not in others.
|
||||
*
|
||||
* To compile, just call a C compiler/linker with this source file.
|
||||
* On Windows: cl ucdstrip.c
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* return the first character position after the end of the data */
|
||||
static char *
|
||||
endOfData(const char *l) {
|
||||
char *end;
|
||||
char c;
|
||||
|
||||
end=strchr(l, '#');
|
||||
if(end!=NULL) {
|
||||
/* ignore whitespace before the comment */
|
||||
while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
|
||||
--end;
|
||||
}
|
||||
} else {
|
||||
end=strchr(l, 0);
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
extern int
|
||||
main(int argc, const char *argv[]) {
|
||||
static char line[2000];
|
||||
char *end;
|
||||
|
||||
while(gets(line)!=NULL) {
|
||||
if(strtol(line, &end, 16)>=0 && end!=line) {
|
||||
/* code point or range followed by semicolon and data, remove comment */
|
||||
*endOfData(line)=0;
|
||||
}
|
||||
puts(line);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user