2000-10-26 15:59:34 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2000, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: gbsingle.c
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2000oct26
|
|
|
|
* created by: Markus W. Scherer
|
|
|
|
*
|
|
|
|
* This tool reads a mapping table in a very simple format with combined syntax
|
|
|
|
* for mappings from Unicode to GB 18030 and back and turns it into
|
|
|
|
* a single-direction file with only either mapping direction.
|
|
|
|
* The input format is as follows:
|
2000-10-30 17:22:57 +00:00
|
|
|
* unicode [':' | '>' | '<'] codepage ['*']
|
2000-10-26 15:59:34 +00:00
|
|
|
* With
|
|
|
|
* unicode = hexadecimal number 0..10ffff
|
|
|
|
* codepage = hexadecimal number 0..ffffffff for big-endian bytes
|
|
|
|
* ':' for roundtrip mappings
|
|
|
|
* '>' for fallbacks from Unicode to codepage
|
2000-10-30 17:22:57 +00:00
|
|
|
* '<' for fallbacks from codepage to Unicode
|
2000-10-26 15:59:34 +00:00
|
|
|
* '*' ignored
|
|
|
|
*
|
|
|
|
* The output format is as follows:
|
|
|
|
* With no command line argument:
|
|
|
|
* unicode ':' codepage
|
|
|
|
* With a "gb" command line argument:
|
|
|
|
* codepage ':' unicode
|
|
|
|
*
|
|
|
|
* To compile, just call a C compiler/linker with this source file.
|
|
|
|
* On Windows: cl gbsingle.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
extern int
|
|
|
|
main(int argc, const char *argv[]) {
|
|
|
|
char line[200];
|
|
|
|
char *end;
|
|
|
|
unsigned long c, b;
|
2000-10-30 17:22:57 +00:00
|
|
|
signed char dir;
|
2000-10-26 15:59:34 +00:00
|
|
|
char uniToGB;
|
|
|
|
|
|
|
|
if(argc<=1) {
|
|
|
|
puts("# Unicode:GB 18030");
|
|
|
|
uniToGB=1;
|
|
|
|
} else if(0==strcmp(argv[1], "gb")) {
|
|
|
|
puts("# GB 18030:Unicode");
|
|
|
|
uniToGB=0;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "unknown argument %s\n", argv[1]);
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* parse the input file from stdin */
|
|
|
|
while(gets(line)!=NULL) {
|
|
|
|
/* pass through empty and comment lines */
|
|
|
|
if(line[0]==0 || line[0]=='#' || line[0]==0x1a) {
|
|
|
|
puts(line);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* end of code points, beginning of ranges? */
|
|
|
|
if(0==strcmp(line, "ranges")) {
|
|
|
|
break; /* ignore the rest of the file */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read Unicode code point */
|
|
|
|
c=strtoul(line, &end, 16);
|
2000-10-30 17:22:57 +00:00
|
|
|
if(end==line) {
|
|
|
|
fprintf(stderr, "error: missing code point in \"%s\"\n", line);
|
2000-10-26 15:59:34 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if(*end==':') {
|
2000-10-30 17:22:57 +00:00
|
|
|
dir=0;
|
|
|
|
} else if(*end=='>') {
|
|
|
|
dir=1;
|
|
|
|
} else if(*end=='<') {
|
|
|
|
dir=-1;
|
2000-10-26 15:59:34 +00:00
|
|
|
} else {
|
2000-10-30 17:22:57 +00:00
|
|
|
fprintf(stderr, "error: delimiter not one of :>< in \"%s\"\n", line);
|
|
|
|
return 1;
|
2000-10-26 15:59:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* read byte sequence as one long value */
|
|
|
|
b=strtoul(end+1, &end, 16);
|
|
|
|
if(*end!=0 && *end!='*') {
|
|
|
|
fprintf(stderr, "error parsing byte sequence from \"%s\"\n", line);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(uniToGB) {
|
2000-10-30 17:22:57 +00:00
|
|
|
/* output Unicode:GB 18030 including fallbacks from Unicode to codepage */
|
|
|
|
if(dir>=0) {
|
2000-11-30 22:07:12 +00:00
|
|
|
printf("%04lX:%02lX\n", c, b);
|
2000-10-30 17:22:57 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* output Unicode:GB 18030 including fallbacks from codepage to Unicode */
|
|
|
|
if(dir<=0) {
|
2000-11-30 22:07:12 +00:00
|
|
|
printf("%02lX:%04lX\n", b, c);
|
2000-10-30 17:22:57 +00:00
|
|
|
}
|
2000-10-26 15:59:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|