/* ******************************************************************************* * * Copyright (C) 2000, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: gbsingle.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2000oct26 * created by: Markus W. Scherer * * This tool reads a mapping table in a very simple format with combined syntax * for mappings from Unicode to GB 18030 and back and turns it into * a single-direction file with only either mapping direction. * The input format is as follows: * unicode [':' | '>' | '<'] codepage ['*'] * With * unicode = hexadecimal number 0..10ffff * codepage = hexadecimal number 0..ffffffff for big-endian bytes * ':' for roundtrip mappings * '>' for fallbacks from Unicode to codepage * '<' for fallbacks from codepage to Unicode * '*' ignored * * The output format is as follows: * With no command line argument: * unicode ':' codepage * With a "gb" command line argument: * codepage ':' unicode * * To compile, just call a C compiler/linker with this source file. * On Windows: cl gbsingle.c */ #include #include #include extern int main(int argc, const char *argv[]) { char line[200]; char *end; unsigned long c, b; signed char dir; char uniToGB; if(argc<=1) { puts("# Unicode:GB 18030"); uniToGB=1; } else if(0==strcmp(argv[1], "gb")) { puts("# GB 18030:Unicode"); uniToGB=0; } else { fprintf(stderr, "unknown argument %s\n", argv[1]); return 2; } /* parse the input file from stdin */ while(gets(line)!=NULL) { /* pass through empty and comment lines */ if(line[0]==0 || line[0]=='#' || line[0]==0x1a) { puts(line); continue; } /* end of code points, beginning of ranges? */ if(0==strcmp(line, "ranges")) { break; /* ignore the rest of the file */ } /* read Unicode code point */ c=strtoul(line, &end, 16); if(end==line) { fprintf(stderr, "error: missing code point in \"%s\"\n", line); return 1; } if(*end==':') { dir=0; } else if(*end=='>') { dir=1; } else if(*end=='<') { dir=-1; } else { fprintf(stderr, "error: delimiter not one of :>< in \"%s\"\n", line); return 1; } /* read byte sequence as one long value */ b=strtoul(end+1, &end, 16); if(*end!=0 && *end!='*') { fprintf(stderr, "error parsing byte sequence from \"%s\"\n", line); return 1; } if(uniToGB) { /* output Unicode:GB 18030 including fallbacks from Unicode to codepage */ if(dir>=0) { printf("%04lx:%02lx\n", c, b); } } else { /* output Unicode:GB 18030 including fallbacks from codepage to Unicode */ if(dir<=0) { printf("%02lx:%04lx\n", b, c); } } } return 0; }