2000-10-23 20:50:58 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2000, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: gbtoucm.c
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2000oct19
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*
|
|
|
|
* This tool reads a mapping table in a very simple format and turns it into
|
|
|
|
* .ucm file format.
|
|
|
|
* The input format is as follows:
|
2000-10-30 17:22:57 +00:00
|
|
|
* unicode [':' | '>' | '<'] codepage ['*']
|
2000-10-23 20:50:58 +00:00
|
|
|
* With
|
|
|
|
* unicode = hexadecimal number 0..10ffff
|
|
|
|
* codepage = hexadecimal number 0..ffffffff for big-endian bytes
|
|
|
|
* ':' for roundtrip mappings
|
|
|
|
* '>' for fallbacks from Unicode to codepage
|
2000-10-30 17:22:57 +00:00
|
|
|
* '<' for fallbacks from codepage to Unicode
|
2000-10-23 20:50:58 +00:00
|
|
|
* '*' ignored
|
|
|
|
*
|
|
|
|
* To compile, just call a C compiler/linker with this source file.
|
|
|
|
* On Windows: cl gbtoucm.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
extern int
|
|
|
|
main(int argc, const char *argv[]) {
|
|
|
|
char line[200];
|
|
|
|
char *end;
|
|
|
|
unsigned long c, b;
|
|
|
|
unsigned char fallback;
|
|
|
|
|
|
|
|
/* parse the input file from stdin */
|
|
|
|
while(gets(line)!=NULL) {
|
|
|
|
/* pass through empty and comment lines */
|
|
|
|
if(line[0]==0 || line[0]=='#' || line[0]==0x1a) {
|
|
|
|
puts(line);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* end of code points, beginning of ranges? */
|
|
|
|
if(0==strcmp(line, "ranges")) {
|
|
|
|
break; /* ignore the rest of the file */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read Unicode code point */
|
|
|
|
c=strtoul(line, &end, 16);
|
2000-10-30 17:22:57 +00:00
|
|
|
if(end==line) {
|
|
|
|
fprintf(stderr, "error: missing code point in \"%s\"\n", line);
|
2000-10-23 20:50:58 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if(*end==':') {
|
|
|
|
fallback=0;
|
2000-10-30 17:22:57 +00:00
|
|
|
} else if(*end=='>') {
|
2000-10-23 20:50:58 +00:00
|
|
|
fallback=1;
|
2000-10-30 17:22:57 +00:00
|
|
|
} else if(*end=='<') {
|
|
|
|
fallback=3;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "error: delimiter not one of :>< in \"%s\"\n", line);
|
|
|
|
return 1;
|
2000-10-23 20:50:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* read byte sequence as one long value */
|
|
|
|
b=strtoul(end+1, &end, 16);
|
|
|
|
if(*end!=0 && *end!='*') {
|
|
|
|
fprintf(stderr, "error parsing byte sequence from \"%s\"\n", line);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* output in .ucm format */
|
|
|
|
if(b<=0xff) {
|
2000-11-30 22:07:12 +00:00
|
|
|
printf("<U%04lX> \\x%02X |%u\n", c, b, fallback);
|
2000-10-23 20:50:58 +00:00
|
|
|
} else if(b<=0xffff) {
|
2000-11-30 22:07:12 +00:00
|
|
|
printf("<U%04lX> \\x%02X\\x%02X |%u\n", c, b>>8, b&0xff, fallback);
|
2000-10-23 20:50:58 +00:00
|
|
|
} else if(b<=0xffffff) {
|
2000-11-30 22:07:12 +00:00
|
|
|
printf("<U%04lX> \\x%02X\\x%02X\\x%02X |%u\n", c, b>>16, (b>>8)&0xff, b&0xff, fallback);
|
2000-10-23 20:50:58 +00:00
|
|
|
} else {
|
2000-11-30 22:07:12 +00:00
|
|
|
printf("<U%04lX> \\x%02X\\x%02X\\x%02X\\x%02X |%u\n", c, b>>24, (b>>16)&0xff, (b>>8)&0xff, b&0xff, fallback);
|
2000-10-23 20:50:58 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|