/* ******************************************************************************* * * Copyright (C) 2000, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: gbtoucm.c * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2000oct19 * created by: Markus W. Scherer * * This tool reads a mapping table in a very simple format and turns it into * .ucm file format. * The input format is as follows: * unicode [':' | '>' | '<'] codepage ['*'] * With * unicode = hexadecimal number 0..10ffff * codepage = hexadecimal number 0..ffffffff for big-endian bytes * ':' for roundtrip mappings * '>' for fallbacks from Unicode to codepage * '<' for fallbacks from codepage to Unicode * '*' ignored * * To compile, just call a C compiler/linker with this source file. * On Windows: cl gbtoucm.c */ #include #include #include extern int main(int argc, const char *argv[]) { char line[200]; char *end; unsigned long c, b; unsigned char fallback; /* parse the input file from stdin */ while(gets(line)!=NULL) { /* pass through empty and comment lines */ if(line[0]==0 || line[0]=='#' || line[0]==0x1a) { puts(line); continue; } /* end of code points, beginning of ranges? */ if(0==strcmp(line, "ranges")) { break; /* ignore the rest of the file */ } /* read Unicode code point */ c=strtoul(line, &end, 16); if(end==line) { fprintf(stderr, "error: missing code point in \"%s\"\n", line); return 1; } if(*end==':') { fallback=0; } else if(*end=='>') { fallback=1; } else if(*end=='<') { fallback=3; } else { fprintf(stderr, "error: delimiter not one of :>< in \"%s\"\n", line); return 1; } /* read byte sequence as one long value */ b=strtoul(end+1, &end, 16); if(*end!=0 && *end!='*') { fprintf(stderr, "error parsing byte sequence from \"%s\"\n", line); return 1; } /* output in .ucm format */ if(b<=0xff) { printf(" \\x%02X |%u\n", c, b, fallback); } else if(b<=0xffff) { printf(" \\x%02X\\x%02X |%u\n", c, b>>8, b&0xff, fallback); } else if(b<=0xffffff) { printf(" \\x%02X\\x%02X\\x%02X |%u\n", c, b>>16, (b>>8)&0xff, b&0xff, fallback); } else { printf(" \\x%02X\\x%02X\\x%02X\\x%02X |%u\n", c, b>>24, (b>>16)&0xff, (b>>8)&0xff, b&0xff, fallback); } } return 0; }