scuffed-code/icu4c/source/tools/makeconv/misc/rptp2ucm.c

943 lines
29 KiB
C
Raw Normal View History

/*
*******************************************************************************
*
* Copyright (C) 2000-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: rptp2ucm.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001feb16
* created by: Markus W. Scherer
*
* This tool reads two CDRA conversion table files (RPMAP & TPMAP or RXMAP and TXMAP) and
* generates a canonicalized ICU .ucm file from them.
* If the RPMAP/RXMAP file does not contain a comment line with the substitution character,
* then this tool also attempts to read the header of the corresponding UPMAP/UXMAP file
* to extract subchar and subchar1.
*
* R*MAP: Unicode->codepage
* T*MAP: codepage->Unicode
*
* To compile, just call a C compiler/linker with this source file.
* On Windows: cl rptp2ucm.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
typedef struct UCMSubchar {
const char *name;
unsigned long subchar, subchar1;
} UCMSubchar;
static const UCMSubchar
knownSubchars[]={
"274_P100", 0x3f, 0,
"850_P100", 0x7f, 0,
"913_P100", 0x1a, 0,
"1047_P100", 0x3f, 0
};
typedef struct CCSIDStateTable {
unsigned int ccsid;
const char *table;
} CCSIDStateTable;
/*Year when the ucm files were produced using this tool*/
#define YEAR "2000"
/**/
#define japanesePCDBCSStates "<icu:state> 0-ff:2, 81-9f:1, a0-fc:1\n"\
"<icu:state> 40-7e, 80-fc\n"\
"<icu:state>\n"
static const CCSIDStateTable
knownStateTables[]={
301, "<icu:state> 0-ff:2, 81-9f:1, e0-fc:1\n"
"<icu:state> 40-7e, 80-fc\n"
"<icu:state>\n",
367, "<icu:state> 0-7f\n",
927, japanesePCDBCSStates,
926, japanesePCDBCSStates,
928, japanesePCDBCSStates,
932, "<icu:state> 0-7f,80,81-9f:1,a0-df,fd-ff, e0-fc:1\n"
"<icu:state> 40-7e, 80-fc\n",
941, japanesePCDBCSStates,
942, "<icu:state> 0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n"
"<icu:state> 40-7e, 80-fc\n",
943, "<icu:state> 0-7f, 81-9f:1, a0-df, e0-fc:1\n"
"<icu:state> 40-7e, 80-fc\n",
944, "<icu:state> 0-80, 81-bf:1, c0-ff\n"
"<icu:state> 40-7e, 80-fe\n",
946, "<icu:state> 0-80, 81-fb:1,fc:2,fd-ff\n"
"<icu:state> 40-7e, 80-fe\n"
"<icu:state> 80-fe.u,fc",
947, "<icu:state> 0-7f, 80-fe:1\n"
"<icu:state> 40-7e, 80-fe\n",
948, "<icu:state> 0-80, 81-fb:1,fc:2,fd-fe\n"
"<icu:state> 40-7e, 80-fe\n"
"<icu:state> 80-fe.u,fc\n",
949, "<icu:state> 0-84, 8f-fe:1\n"
"<icu:state> 40-7e, 80-fe\n",
950, "<icu:state> 0-7f, 81-fe:1\n"
"<icu:state> 40-7e, 81-fe\n",
954, "<icu:state> 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
"<icu:state> a1-fe\n"
"<icu:state> a1-e4\n"
"<icu:state> a1-fe:1, a1:4\n"
"<icu:state> a1-fe.u\n",
955, "<icu:state> 0-20:2, 21-7e:1, 7f-ff:2\n"
"<icu:state> 21-7e\n"
"<icu:state>\n",
963, "<icu:state> 0-20:2, 21-7e:1, 7f-ff:2\n"
"<icu:state> 21-7e\n"
"<icu:state>\n",
964, "<icu:state> 0-8d, 8e:2, 90-9f, a1-fe:1, aa-c1:5, c3:5, fe:5\n"
"<icu:state> a1-fe\n"
"<icu:state> a1-b0:3, a1:4, a2:8, a3-ab:4, ac:7, ad:6, ae-b0:4\n"
"<icu:state> a1-fe:1\n"
"<icu:state> a1-fe:5\n"
"<icu:state> a1-fe.u\n"
"<icu:state> a1-a4:1, a5-fe:5\n"
"<icu:state> a1-e2:1, e3-fe:5\n"
"<icu:state> a1-f2:1, f3-fe:5\n",
970, "<icu:state> 0-9f, a1-fe:1\n"
"<icu:state> a1-fe\n",
1363, "<icu:state> 0-7f, 81-fe:1\n"
"<icu:state> 40-7e, 80-fe\n",
1350, "<icu:state> 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
"<icu:state> a1-fe\n"
"<icu:state> a1-e4\n"
"<icu:state> a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4\n"
"<icu:state> a1-fe.u\n",
1351, "<icu:state> 0-ff:2, 81-9f:1, e0-fc:1\n"
"<icu:state> 40-7e, 80-fc\n"
"<icu:state>\n",
1370, "<icu:state> 0-80, 81-fe:1\n"
"<icu:state> 40-7e, 81-fe\n",
1381, "<icu:state> 0-84, 8c-fe:1\n"
"<icu:state> a1-fe\n",
1383, "<icu:state> 0-9f, a1-fe:1\n"
"<icu:state> a1-fe\n",
1385, "<icu:state> 0-ff:2,81-fe:1\n"
"<icu:state> 40-7e, 80-fe\n"
"<icu:state>\n",
1386, "<icu:state> 0-7f, 81-fe:1\n"
"<icu:state> 40-7e, 80-fe\n",
5039, "<icu:state> 0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n"
"<icu:state> 40-7e, 80-fc\n",
5050, "<icu:state> 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
"<icu:state> a1-fe\n"
"<icu:state> a1-e4\n"
"<icu:state> a1-fe:1, a1:4, a3-af:4, b6:4, d6:4, da-db:4, ed-f2:4\n"
"<icu:state> a1-fe.u\n",
5067, "<icu:state> 0-ff:2, 21-7e:1\n"
"<icu:state> 21-7e\n"
"<icu:state>\n",
5478, "<icu:state> 0-ff:2, 21-7e:1\n"
"<icu:state> 21-7e\n"
"<icu:state>\n",
21427, "<icu:state> 0-80:2, 81-fe:1, ff:2\n"
"<icu:state> 40-7e, 80-fe\n"
"<icu:state>\n",
25546, "<icu:state> 0-7f, e:1.s, f:0.s\n"
"<icu:state> initial, 0-20:3, e:1.s, f:0.s, 21-7e:2, 7f-ff:3\n"
"<icu:state> 0-20:1.i, 21-7e:1., 7f-ff:1.i\n"
"<icu:state> 0-ff:1.i\n",
33722, "<icu:state> 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n"
"<icu:state> a1-fe\n"
"<icu:state> a1-e4\n"
"<icu:state> a1-fe:1, a1:4, a3-af:4, b6:4, d6:4, da-db:4, ed-f2:4\n"
"<icu:state> a1-fe.u\n"
};
typedef struct Mapping {
/*
* u bits:
* 31..24 fallback indicator
* 0 roundtrip
* 1 Unicode->codepage
* 3 codepage->Unicode
* 23.. 0 Unicode code point
*
* b: codepage bytes with leading zeroes
*/
unsigned long u, b;
} Mapping;
#define MAX_MAPPINGS_COUNT 200000
static Mapping
fromUMappings[MAX_MAPPINGS_COUNT], toUMappings[MAX_MAPPINGS_COUNT];
static long fromUMappingsTop, toUMappingsTop;
static unsigned long subchar, subchar1;
static unsigned int ccsid;
enum {
ASCII,
EBCDIC,
UNKNOWN
};
static char
minCharLength,
maxCharLength,
charsetFamily,
usesPUA,
variantLF,
variantASCII,
variantControls,
variantSUB,
is7Bit;
static void
init() {
fromUMappingsTop=toUMappingsTop=0;
subchar=subchar1=0;
ccsid=0;
minCharLength=4;
maxCharLength=0;
charsetFamily=UNKNOWN;
usesPUA=0;
variantLF=0;
variantASCII=0;
variantControls=0;
variantSUB=0;
is7Bit=0;
}
/* lexically compare Mappings for sorting */
static int
compareMappings(const void *left, const void *right) {
const Mapping *l=(const Mapping *)left, *r=(const Mapping *)right;
long result;
/* the code points use fewer than 32 bits, just cast them to signed values and subtract */
result=(long)(l->u&0xffffff)-(long)(r->u&0xffffff);
if(result!=0) {
/* shift right 16 with sign-extend to take care of int possibly being 16 bits wide */
return (int)(result>>16)|1;
}
/* the b fields may use all 32 bits as unsigned long, so result=(long)(l->b-r->b) would not work (try l->b=0x80000000 and r->b=1) */
if(l->b<r->b) {
return -1;
} else if(l->b>r->b) {
return 1;
}
return (int)(l->u>>24)-(int)(r->u>>24);
}
static const char *
skipWhitespace(const char *s) {
while(*s==' ' || *s=='\t') {
++s;
}
return s;
}
static long
parseMappings(FILE *f, Mapping *mappings) {
char line[200];
Mapping *oldMappings;
char *s, *end;
long mappingsTop=0;
oldMappings=mappings;
while(fgets(line, sizeof(line), f)!=NULL) {
s=(char *)skipWhitespace(line);
/* skip empty lines */
if(*s==0 || *s=='\n' || *s=='\r') {
continue;
}
/* explicit end of table */
if(memcmp(s, "END CHARMAP", 11)==0) {
break;
}
/* comment lines, parse substitution characters, otherwise skip them */
if(*s=='#' || *s=='*') {
/* get subchar1 */
s=strstr(line, "for U+00xx");
if(s!=NULL) {
s=strstr(line, "x'");
if(s!=NULL) {
s+=2;
subchar1=strtoul(s, &end, 16);
if(end!=s+2 || *end!='\'') {
fprintf(stderr, "error parsing subchar1 from \"%s\"\n", line);
exit(2);
}
continue;
} else {
fprintf(stderr, "error finding subchar1 on \"%s\"\n", line);
exit(2);
}
}
/* get subchar */
s=strstr(line, "for U+xxxx");
if(s!=NULL) {
s=strstr(line, "x'");
if(s!=NULL) {
s+=2;
subchar=strtoul(s, &end, 16);
if(end<s+2 || *end!='\'') {
fprintf(stderr, "error parsing subchar from \"%s\"\n", line);
exit(2);
}
continue;
} else {
fprintf(stderr, "error finding subchar on \"%s\"\n", line);
exit(2);
}
}
continue;
}
mappings->b=strtoul(s, &end, 16);
if(s==end || (*end!=' ' && *end!='\t')) {
if((s+1)==end && *end=='-' && (mappings->b<=3)) {
/* this is a special EUC format where the code set number prepends the bytes */
unsigned long prefix;
switch(mappings->b) {
case 0:
prefix=0;
break;
case 1:
prefix=0;
break;
case 2:
prefix=0x8e;
break;
case 3:
prefix=0x8f;
break;
default:
/* never occurs because of above check */
break;
}
s+=2;
mappings->b=strtoul(s, &end, 16);
if(s==end || ((end-s)&1) || (*end!=' ' && *end!='\t')) {
fprintf(stderr, "error parsing EUC codepage bytes on \"%s\"\n", line);
exit(2);
}
mappings->b|=prefix<<(4*(end-s));
} else {
fprintf(stderr, "error parsing codepage bytes on \"%s\"\n", line);
exit(2);
}
}
s=(char *)skipWhitespace(end);
mappings->u=strtoul(s, &end, 16);
if(s==end || (*end!=' ' && *end!='\t' && *end!='\n' && *end!='\r' && *end!=0)) {
if(strncmp(s, "????", 4)==0 || strstr(s, "UNASSIGNED")!=NULL) {
/* this is a non-entry, do not add it to the mapping table */
continue;
}
fprintf(stderr, "error parsing Unicode code point on \"%s\"\n", line);
exit(2);
}
++mappings;
if(++mappingsTop>=MAX_MAPPINGS_COUNT) {
fprintf(stderr, "error: too many mappings at \"%s\"\n", line);
exit(2);
}
}
/* sort the mappings */
qsort(oldMappings, mappingsTop, sizeof(Mapping), compareMappings);
return mappingsTop;
}
/* merge the mappings into fromUMappings and add fallback indicator values to Mapping.u bits 31..24 */
static void
mergeMappings() {
long fromUIndex, toUIndex, newFromUMappingsTop=fromUMappingsTop;
int cmp;
fromUIndex=toUIndex=0;
while(fromUIndex<fromUMappingsTop && toUIndex<toUMappingsTop) {
cmp=compareMappings(fromUMappings+fromUIndex, toUMappings+toUIndex);
if(cmp==0) {
/* equal: roundtrip, nothing to do */
++fromUIndex;
++toUIndex;
} else if(cmp<0) {
/*
* the fromU mapping does not have a toU counterpart:
* fallback Unicode->codepage
*/
if(fromUMappings[fromUIndex].b!=subchar && fromUMappings[fromUIndex].b!=subchar1) {
fromUMappings[fromUIndex++].u|=0x1000000;
} else {
fromUMappings[fromUIndex++].u|=0x2000000;
}
} else {
/*
* the toU mapping does not have a fromU counterpart:
* (reverse) fallback codepage->Unicode, copy it to the fromU table
*/
fromUMappings[newFromUMappingsTop].u=toUMappings[toUIndex].u|=0x3000000;
fromUMappings[newFromUMappingsTop++].b=toUMappings[toUIndex++].b;
}
}
/* either one or both tables are exhausted */
while(fromUIndex<fromUMappingsTop) {
/* leftover fromU mappings are fallbacks */
if(fromUMappings[fromUIndex].b!=subchar && fromUMappings[fromUIndex].b!=subchar1) {
fromUMappings[fromUIndex++].u|=0x1000000;
} else {
fromUMappings[fromUIndex++].u|=0x2000000;
}
}
while(toUIndex<toUMappingsTop) {
/* leftover toU mappings are reverse fallbacks */
fromUMappings[newFromUMappingsTop].u=toUMappings[toUIndex].u|=0x3000000;
fromUMappings[newFromUMappingsTop++].b=toUMappings[toUIndex++].b;
}
fromUMappingsTop=newFromUMappingsTop;
/* re-sort the mappings */
qsort(fromUMappings, fromUMappingsTop, sizeof(Mapping), compareMappings);
}
static void
analyzeTable() {
unsigned long u, b, f, minTwoByte=0xffff, maxTwoByte=0, oredBytes=0;
long i, countASCII=0;
char length;
for(i=0; i<fromUMappingsTop; ++i) {
f=fromUMappings[i].u>>24;
u=fromUMappings[i].u&0xffffff;
b=fromUMappings[i].b;
oredBytes|=b;
/* character length? */
if(b<=0xff) {
length=1;
} else if(b<=0xffff) {
length=2;
if(b<minTwoByte) {
minTwoByte=b;
}
if(b>maxTwoByte) {
maxTwoByte=b;
}
} else if(b<=0xffffff) {
length=3;
} else {
length=4;
}
if(length<minCharLength) {
minCharLength=length;
}
if(length>maxCharLength) {
maxCharLength=length;
}
/* PUA used? */
if((unsigned long)(u-0xe000)<0x1900 || (unsigned long)(u-0xf0000)<0x20000) {
usesPUA=1;
}
/* only consider roundtrip mappings for the rest */
if(f!=0) {
continue;
}
/* ASCII or EBCDIC? */
if(u==0x41) {
if(b==0x41) {
charsetFamily=ASCII;
} else if(b==0xc1) {
charsetFamily=EBCDIC;
}
} else if(u==0xa) {
if(b==0xa) {
charsetFamily=ASCII;
} else if(b==0x25) {
charsetFamily=EBCDIC;
variantLF=0;
} else if(b==0x15) {
charsetFamily=EBCDIC;
variantLF=1;
}
}
/* US-ASCII? */
if((unsigned long)(u-0x21)<94) {
if(u==b) {
++countASCII;
} else {
variantASCII=1;
}
} else if(u<0x20 || u==0x7f) {
/* non-ISO C0 controls? */
if(u!=b) {
/* IBM PC rotation of SUB and other controls: 0x1a->0x7f->0x1c->0x1a */
if(u==0x1a && b==0x7f || u==0x1c && b==0x1a || u==0x7f && b==0x1c) {
charsetFamily=ASCII;
variantSUB=1;
} else {
variantControls=1;
}
}
}
}
is7Bit= oredBytes<=0x7f;
if(charsetFamily==UNKNOWN) {
if(minCharLength==2 && maxCharLength==2) {
/* guess the charset family for DBCS according to typical byte distributions */
if( ((0x2020<=minTwoByte || minTwoByte<=0x217e) && maxTwoByte<=0x7e7e) ||
((0xa0a0<=minTwoByte || minTwoByte<=0xa1fe) && maxTwoByte<=0xfefe) ||
((0x8140<=minTwoByte || minTwoByte<=0x81fe) && maxTwoByte<=0xfefe)
) {
charsetFamily=ASCII;
} else if((minTwoByte==0x4040 || (0x4141<=minTwoByte && minTwoByte<=0x41fe)) && maxTwoByte<=0xfefe) {
charsetFamily=EBCDIC;
}
}
if(charsetFamily==UNKNOWN) {
fprintf(stderr, "error: unable to determine the charset family\n");
exit(3);
}
}
/* reset variant indicators if they do not apply */
if(charsetFamily!=ASCII || minCharLength!=1) {
variantASCII=variantSUB=variantControls=0;
} else if(countASCII!=94) {
/* if there are not 94 mappings for ASCII graphic characters, then set variantASCII */
variantASCII=1;
}
if(charsetFamily!=EBCDIC || minCharLength!=1) {
variantLF=0;
}
}
static int
getSubchar(const char *name) {
int i;
for(i=0; i<sizeof(knownSubchars)/sizeof(knownSubchars[0]); ++i) {
if(strcmp(name, knownSubchars[i].name)==0) {
subchar=knownSubchars[i].subchar;
subchar1=knownSubchars[i].subchar1;
return 1;
}
}
return 0;
}
static void
getSubcharFromUPMAP(FILE *f) {
char line[200];
char *s, *end;
unsigned long *p;
unsigned long value, bytes;
while(fgets(line, sizeof(line), f)!=NULL && memcmp(line, "CHARMAP", 7)!=0) {
s=(char *)skipWhitespace(line);
/* skip empty lines */
if(*s==0 || *s=='\n' || *s=='\r') {
continue;
}
/* look for variations of subchar entries */
if(memcmp(s, "<subchar>", 9)==0) {
s=(char *)skipWhitespace(s+9);
p=&subchar;
} else if(memcmp(s, "<subchar1>", 10)==0) {
s=(char *)skipWhitespace(s+10);
p=&subchar1;
} else if(memcmp(s, "#<subchar1>", 11)==0) {
s=(char *)skipWhitespace(s+11);
p=&subchar1;
} else {
continue;
}
/* get the value and store it in *p */
bytes=0;
while(s[0]=='\\' && s[1]=='x') {
value=strtoul(s+2, &end, 16);
s+=4;
if(end!=s) {
fprintf(stderr, "error parsing UPMAP subchar from \"%s\"\n", line);
exit(2);
}
bytes=(bytes<<8)|value;
}
*p=bytes;
}
}
static const char *
getStateTable() {
int i;
for(i=0; i<sizeof(knownStateTables)/sizeof(knownStateTables[0]); ++i) {
if(ccsid==knownStateTables[i].ccsid) {
return knownStateTables[i].table;
}
}
return NULL;
}
static void
writeBytes(char *s, unsigned long b) {
if(b<=0xff) {
sprintf(s, "\\x%02lX", b);
} else if(b<=0xffff) {
sprintf(s, "\\x%02lX\\x%02lX", b>>8, b&0xff);
} else if(b<=0xffffff) {
sprintf(s, "\\x%02lX\\x%02lX\\x%02lX", b>>16, (b>>8)&0xff, b&0xff);
} else {
sprintf(s, "\\x%02lX\\x%02lX\\x%02lX\\x%02lX", b>>24, (b>>16)&0xff, (b>>8)&0xff, b&0xff);
}
}
static void
writeUCM(FILE *f, const char *ucmname, const char *rpname, const char *tpname) {
char buffer[100];
const char *s;
long i;
/* write the header */
fprintf(f,
"# *******************************************************************************\n"
"# *\n"
"# * Copyright (C) 1995-2001, International Business Machines\n"
"# * Corporation and others. All Rights Reserved.\n"
"# *\n"
"# *******************************************************************************\n"
"#\n"
"# File created by rptp2ucm (compiled on %s)\n"
"# from source files %s and %s\n"
"#\n", __DATE__, rpname, tpname);
/* ucmname does not have a path or .ucm */
fprintf(f, "<code_set_name> \"%s\"\n", ucmname);
fputs("<char_name_mask> \"AXXXX\"\n", f);
fprintf(f, "<mb_cur_max> %u\n", maxCharLength);
fprintf(f, "<mb_cur_min> %u\n", minCharLength);
if(maxCharLength==1) {
fputs("<uconv_class> \"SBCS\"\n", f);
} else if(maxCharLength==2) {
if(minCharLength==1) {
if(charsetFamily==EBCDIC) {
fputs("<uconv_class> \"EBCDIC_STATEFUL\"\n", f);
} else {
fputs("<uconv_class> \"MBCS\"\n", f);
}
} else if(minCharLength==2) {
fputs("<uconv_class> \"DBCS\"\n", f);
} else {
fputs("<uconv_class> \"MBCS\"\n", f);
}
} else {
fputs("<uconv_class> \"MBCS\"\n", f);
}
if(subchar!=0) {
writeBytes(buffer, subchar);
fprintf(f, "<subchar> %s\n", buffer);
}
if(subchar1!=0) {
fprintf(f, "<subchar1> \\x%02X\n", subchar1);
}
/* write charset family */
if(charsetFamily==ASCII) {
fputs("<icu:charsetFamily> \"ASCII\"\n", f);
} else {
fputs("<icu:charsetFamily> \"EBCDIC\"\n", f);
}
/* write alias describing the codepage */
sprintf(buffer, "<icu:alias> \"ibm-%u", ccsid);
if(!usesPUA && !variantLF && !variantASCII && !variantControls && !variantSUB) {
strcat(buffer, "_STD\"\n\n");
} else {
/* add variant indicators in alphabetic order */
if(variantASCII) {
strcat(buffer, "_VASCII");
}
if(variantControls) {
strcat(buffer, "_VGCTRL");
}
if(variantLF) {
strcat(buffer, "_VLF");
}
if(variantSUB) {
strcat(buffer, "_VSUB");
}
if(usesPUA) {
strcat(buffer, "_VPUA");
}
strcat(buffer, "\"\n\n");
}
fputs(buffer, f);
/* write the state table - <icu:state> */
s=getStateTable();
if(s!=NULL) {
fputs(s, f);
fputs("\n", f);
} else if(is7Bit) {
fputs("<icu:state> 0-7f\n\n", f);
}
/* write the mappings */
fputs("CHARMAP\n", f);
for(i=0; i<fromUMappingsTop; ++i) {
writeBytes(buffer, fromUMappings[i].b);
fprintf(f, "<U%04lX> %s |%lu\n", fromUMappings[i].u&0xffffff, buffer, fromUMappings[i].u>>24);
}
fputs("END CHARMAP\n", f);
}
static void
processTable(const char *arg) {
char filename[1024], tpname[32];
const char *basename, *s;
FILE *rpmap, *tpmap, *ucm;
unsigned long value, unicode;
int length;
init();
/* separate path and basename */
basename=strrchr(arg, '/');
if(basename==NULL) {
basename=strrchr(arg, '\\');
if(basename==NULL) {
basename=arg;
} else {
++basename;
}
} else {
++basename;
s=strrchr(arg, '\\');
if(s!=NULL && ++s>basename) {
basename=s;
}
}
/* is this a standard RPMAP filename? */
value=strtoul(basename, (char **)&s, 16);
if( strlen(basename)!=17 ||
(memcmp(basename+9, "RPMAP", 5)!=0 && memcmp(basename+9, "rpmap", 5)!=0 &&
memcmp(basename+9, "RXMAP", 5)!=0 && memcmp(basename+9, "rxmap", 5)!=0) ||
(s-basename)!=8 ||
*s!='.'
) {
fprintf(stderr, "error: \"%s\" is not a standard RPMAP filename\n", basename);
exit(1);
}
/* is this really a Unicode conversion table? - get the CCSID */
unicode=value&0xffff;
if(unicode==13488 || unicode==17584) {
ccsid=(unsigned int)(value>>16);
} else {
unicode=value>>16;
if(unicode==13488 || unicode==17584) {
ccsid=(unsigned int)(value&0xffff);
} else {
fprintf(stderr, "error: \"%s\" is not a Unicode conversion table\n", basename);
exit(1);
}
}
/* try to open the RPMAP file */
rpmap=fopen(arg, "r");
if(rpmap==NULL) {
fprintf(stderr, "error: unable to open \"%s\"\n", arg);
exit(1);
}
/* try to open the TPMAP file */
strcpy(filename, arg);
length=strlen(filename);
/* guess the TPMAP filename; note that above we have checked the format of the basename */
/* replace the R in RPMAP by T, keep upper- or lowercase */
if(filename[length-8]=='R') {
filename[length-8]='T';
} else {
filename[length-8]='t';
}
/* reverse the CCSIDs */
memcpy(filename+length-17, basename+4, 4);
memcpy(filename+length-13, basename, 4);
/* first, keep the same suffix */
tpmap=fopen(filename, "r");
if(tpmap==NULL) {
/* next, try reducing the second to last digit by 1 */
--filename[length-2];
tpmap=fopen(filename, "r");
if(tpmap==NULL) {
/* there is no TPMAP */
fprintf(stderr, "error: unable to find the TPMAP file for \"%s\"\n", arg);
exit(1);
}
}
strcpy(tpname, filename+length-17);
/* parse both files */
fromUMappingsTop=parseMappings(rpmap, fromUMappings);
toUMappingsTop=parseMappings(tpmap, toUMappings);
fclose(tpmap);
fclose(rpmap);
/* if there is no subchar, then try to get it from the corresponding UPMAP */
if(subchar==0) {
FILE *f;
/* restore the RPMAP filename and just replace the R by U */
strcpy(filename+length-17, basename);
if(filename[length-8]=='R') {
filename[length-8]='U';
} else {
filename[length-8]='u';
}
f=fopen(filename, "r");
if(f==NULL) {
/* try reversing the CCSIDs */
memcpy(filename+length-17, basename+4, 4);
memcpy(filename+length-13, basename, 4);
f=fopen(filename, "r");
}
if(f!=NULL) {
getSubcharFromUPMAP(f);
fclose(f);
}
}
/* generate the .ucm filename - necessary before getSubchar() */
length=sprintf(filename, "ibm-%u_", ccsid);
/* uppercase and append the suffix */
filename[length++]=toupper(basename[10]); /* P or X */
filename[length++]=toupper(basename[14]); /* last 3 suffix characters */
filename[length++]=toupper(basename[15]);
filename[length++]=toupper(basename[16]);
filename[length++]='-';
filename[length]=0;
/*concatenate year*/
strcat(filename,YEAR);
/* find the subchar if still necessary - necessary before merging for correct |2 */
if(subchar==0 && !getSubchar(filename+4)) {
fprintf(stderr, "warning: missing subchar in \"%s\" (CCSID=0x%04X)\n", filename, ccsid);
}
/* merge the mappings */
mergeMappings();
/* analyze the conversion table */
analyzeTable();
/* open the .ucm file */
strcat(filename, ".ucm");
ucm=fopen(filename, "w");
if(ucm==NULL) {
fprintf(stderr, "error: unable to open output file \"%s\"\n", filename);
exit(4);
}
/* remove the .ucm from the filename for the following processing */
filename[strlen(filename)-4]=0;
/* write the .ucm file */
writeUCM(ucm, filename, basename, tpname);
fclose(ucm);
}
extern int
main(int argc, const char *argv[]) {
if(argc<2) {
fprintf(stderr,
"usage: %s { rpmap/rxmap-filename }+\n",
argv[0]);
exit(1);
}
while(--argc>0) {
processTable(*++argv);
}
return 0;
}