ICU-502 clean up 'unassigned' handling and callback calls

X-SVN-Rev: 1871
2000-07-13 23:55:33 +00:00 · 2000-07-13 23:55:33 +00:00 · 7d721ba16c
commit 7d721ba16c
parent 2c9d62de49
10 changed files with 204 additions and 129 deletions
--- a/icu4c/source/common/ucnv2022.c
+++ b/icu4c/source/common/ucnv2022.c
@ -591,7 +591,7 @@ static UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
  if  (args->sourceLimit < args->source)
    {
      *err = U_ILLEGAL_ARGUMENT_ERROR;
-      return 0xFFFD;
+      return 0xffff;
    }
  
  for (;;)
@ -621,7 +621,7 @@ static UChar32 T_UConverter_getNextUChar_ISO_2022(UConverterToUnicodeArgs* args,
      args->source++;
    }
  
-  return 0xFFFD;
+  return 0xffff;
 }

 static const UConverterImpl _ISO2022Impl={
@ -711,7 +711,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
              targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);

              /*writing the UniChar to the output stream */
-              if (targetUniChar != missingUCharMarker)
+              if (targetUniChar < 0xfffe)
                {
                  /*writes the UniChar to the output stream */
                  args->target[myTargetIndex++] = targetUniChar;
@ -722,8 +722,19 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
                  const char* saveSource = args->source;
                  UChar* saveTarget = args->target;
                  int32_t *saveOffsets = args->offsets;
+                  UConverterCallbackReason reason;
+
+                  if (targetUniChar == 0xfffe)
+                  {
+                    reason = UCNV_UNASSIGNED;
+                    *err = U_INVALID_CHAR_FOUND;
+                  }
+                  else
+                  {
+                    reason = UCNV_ILLEGAL;
+                    *err = U_ILLEGAL_CHAR_FOUND;
+                  }

-                  *err = U_INVALID_CHAR_FOUND;
                  if (mySourceChar > 0xff)
                    {
                      args->converter->invalidCharLength = 2;
@ -740,9 +751,9 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverterToUnicodeArgs *args,
                  args->source += mySourceIndex;
                  ToU_CALLBACK_MACRO(args->converter->toUContext,
                                     args,
-                                     args->source,
-                                     1, 
-                                     UCNV_UNASSIGNED,
+                                     args->converter->invalidCharBuffer,
+                                     args->converter->invalidCharLength,
+                                     reason,
                                     err);

                  args->source = saveSource;
@ -828,7 +839,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
              targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);

              /*writing the UniChar to the output stream */
-              if (targetUniChar != missingUCharMarker)
+              if (targetUniChar < 0xfffe)
                {
                  /*writes the UniChar to the output stream */
                  {
@ -846,8 +857,19 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
                  const char* saveSource = args->source;
                  UChar* saveTarget = args->target;
                  int32_t *saveOffsets = args->offsets;
-                  
-                  *err = U_INVALID_CHAR_FOUND;
+                  UConverterCallbackReason reason;
+
+                  if (targetUniChar == 0xfffe)
+                  {
+                    reason = UCNV_UNASSIGNED;
+                    *err = U_INVALID_CHAR_FOUND;
+                  }
+                  else
+                  {
+                    reason = UCNV_ILLEGAL;
+                    *err = U_ILLEGAL_CHAR_FOUND;
+                  }
+
                  if (mySourceChar > 0xFF)
                    {
                      args->converter->invalidCharLength = 2;
@ -869,7 +891,7 @@ void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeAr
                                     args,
                                     args->source,
                                     1, 
-                                     UCNV_UNASSIGNED,
+                                     reason,
                                     err);                  
                  
                  args->source = saveSource;
@ -1160,24 +1182,25 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
  /*safe keeps a ptr to the beginning in case we need to step back*/
  
  /*Input boundary check*/
-  if (args->source+1 > args->sourceLimit) 
+  if (args->source >= args->sourceLimit) 
    {
      *err = U_INDEX_OUTOFBOUNDS_ERROR;
-      return 0xFFFD;
+      return 0xffff;
    }
  
  /*Checks to see if with have SI/SO shifters
   if we do we change the mode appropriately and we consume the byte*/
-  if ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO)) 
+  while ((*(args->source) == UCNV_SI) || (*(args->source) == UCNV_SO)) 
    {
      args->converter->mode = *(args->source);
      args->source++;
+      sourceInitial = args->source;
      
      /*Rechecks boundary after consuming the shift sequence*/
-      if (args->source+1 > args->sourceLimit) 
+      if (args->source >= args->sourceLimit) 
        {
          *err = U_INDEX_OUTOFBOUNDS_ERROR;
-          return 0xFFFD;
+          return 0xffff;
        }
    }
  
@ -1195,7 +1218,7 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
      if ((args->source + 2) > args->sourceLimit) 
        {
          *err = U_TRUNCATED_CHAR_FOUND;
-          return 0xFFFD;
+          return 0xffff;
        }

      myUChar = ucmp16_getu( (&(args->converter->sharedData->table->dbcs.toUnicode)),
@ -1204,28 +1227,34 @@ UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverterToUnicodeArgs* args,
      args->source += 2;
    }
  
-  if (myUChar != 0xFFFD) return myUChar;
+  if (myUChar < 0xfffe) return myUChar;
  else
    {      
-      /*rewinds source*/
      /* HSYS: Check logic here */
-      const char* sourceFinal = args->source;
      UChar* myUCharPtr = &myUChar;
-      
-      *err = U_INVALID_CHAR_FOUND;
-      args->source = sourceInitial;
-      
+      UConverterCallbackReason reason;
+
+      if (myUChar == 0xfffe)
+      {
+        reason = UCNV_UNASSIGNED;
+        *err = U_INVALID_CHAR_FOUND;
+      }
+      else
+      {
+        reason = UCNV_ILLEGAL;
+        *err = U_ILLEGAL_CHAR_FOUND;
+      }
+
      /*It's is very likely that the ErrorFunctor will write to the
       *internal buffers */
      args->target = myUCharPtr;
      args->targetLimit = myUCharPtr + 1;
-      args->source = sourceFinal;

      args->converter->fromCharErrorBehaviour(args->converter->toUContext,
                                    args,
-                                    sourceFinal,
-                                    1,
-                                    UCNV_UNASSIGNED,
+                                    sourceInitial,
+                                    args->source - sourceInitial,
+                                    reason,
                                    err);
      
      /*makes the internal caching transparent to the user*/
--- a/icu4c/source/common/ucnv_bld.c
+++ b/icu4c/source/common/ucnv_bld.c
@ -106,7 +106,7 @@ isCnvAcceptable(void *context,
        pInfo->dataFormat[1]==0x6e &&
        pInfo->dataFormat[2]==0x76 &&
        pInfo->dataFormat[3]==0x74 &&
-        (pInfo->formatVersion[0]==4 || pInfo->formatVersion[0]==5);
+        pInfo->formatVersion[0]==5;
 }

 #define DATA_TYPE "cnv"
@ -418,7 +418,7 @@ UConverter *

 UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
 {
-    UDataInfo info;
+    /* UDataInfo info; -- necessary only if some converters have different formatVersion */
    const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
    const UConverterStaticData *source = (const UConverterStaticData *) raw;
    UConverterSharedData *data;
@ -435,6 +435,8 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
        return NULL;
    }

+#if 0
+    /* necessary only if some converters have different formatVersion; now everything is at version 5 */
    /* test for the format version: MBCS is at version 5, the rest still at 4 */
    info.size=sizeof(UDataInfo);
    udata_getInfo(pData, &info);
@ -442,6 +444,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *s
        *status = U_INVALID_TABLE_FORMAT;
        return NULL;
    }
+#endif

    data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
    if(data == NULL) {
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -54,8 +54,16 @@ union UConverterTable

 U_CDECL_BEGIN

+/* this is used in fromUnicode DBCS tables as an "unassigned" marker */
 #define missingCharMarker 0xFFFF
-#define missingUCharMarker 0xFFFD
+
+/*
+ * #define missingUCharMarker 0xfffe
+ *
+ * there are actually two values used in toUnicode tables:
+ * U+fffe "unassigned"
+ * U+ffff "illegal"
+ */

 #define FromU_CALLBACK_MACRO(context, args, codeUnits, length, codePoint, reason, err) \
              if (args->converter->fromUCharErrorBehaviour == (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_STOP) break;\
--- a/icu4c/source/common/ucnv_lmb.c
+++ b/icu4c/source/common/ucnv_lmb.c
@ -967,7 +967,7 @@ GetUniFromLMBCSUni(char const ** ppLMBCSin)  /* Called with LMBCS-style Unicode
     if (args->source+index > args->sourceLimit){\
         *err = U_TRUNCATED_CHAR_FOUND;\
         args->source = saveSource;\
-         return missingUCharMarker;}
+         return 0xffff;}


 /* Return the Unicode representation for the current LMBCS character
@ -990,7 +990,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
   if (args->source >= args->sourceLimit)
   {
      *err = U_ILLEGAL_ARGUMENT_ERROR;
-      return missingUCharMarker;
+      return 0xffff;
   }
   /* Grab first byte & save address for error recovery */
   CurByte = *((ulmbcs_byte_t  *) (saveSource = args->source++));
@ -1133,7 +1133,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
         }
      }
   }
-   if (((uint32_t)uniChar - 0xfffd) <= 2) /* 0xfffd<=uniChar<=0xffff, was: uniChar == missingUCharMarker */
+   if (((uint32_t)uniChar - 0xfffe) <= 1) /* 0xfffe<=uniChar<=0xffff */
   {
       /*It is very likely that the ErrorFunctor will write to the
       *internal buffers */
@ -1141,10 +1141,21 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
      /* This code needs updating when new error callbacks are installed */

      UChar * pUniChar = (UChar *)&uniChar;
-      *err = U_INVALID_CHAR_FOUND;
+      UConverterCallbackReason reason;
+
+      if (uniChar == 0xfffe)
+      {
+        reason = UCNV_UNASSIGNED;
+        *err = U_INVALID_CHAR_FOUND;
+      }
+      else
+      {
+        reason = UCNV_ILLEGAL;
+        *err = U_ILLEGAL_CHAR_FOUND;
+      }
+
      args->target = pUniChar;
      args->targetLimit = pUniChar + 1;
-      args->source = saveSource;
      args->flush = TRUE;
      args->offsets = NULL;  
      args->size = sizeof(args);
@ -1152,7 +1163,7 @@ _LMBCSGetNextUCharWorker(UConverterToUnicodeArgs*   args,
                                    args,
                                    saveSource,
                                    args->sourceLimit - saveSource,
-                                    UCNV_UNASSIGNED,
+                                    reason,
                                    err);
      args->source = saveSource;
   }
@ -1237,7 +1248,7 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs*    args,
      }
      if (U_SUCCESS(*err))
      {
-         if (uniChar != missingUCharMarker)
+         if (uniChar < 0xfffe)
         {
            *(args->target)++ = uniChar;
            if(args->offsets)
@ -1245,10 +1256,14 @@ _LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs*    args,
               *(args->offsets)++ = saveSource - pStartLMBCS;
            }
         }
-         else
+         else if (uniChar == 0xfffe)
         {
            *err = U_INVALID_CHAR_FOUND;
         }
+         else /* if (uniChar == 0xffff) */
+         {
+            *err = U_ILLEGAL_CHAR_FOUND;
+         }
      }
   }
   /* if target ran out before source, return U_INDEX_OUTOFBOUNDS_ERROR */
--- a/icu4c/source/common/ucnv_utf.c
+++ b/icu4c/source/common/ucnv_utf.c
@ -178,7 +178,7 @@ void T_UConverter_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
                                     args,
                                     args->converter->invalidCharBuffer,
                                     args->converter->invalidCharLength,
-                                     UCNV_UNASSIGNED,
+                                     UCNV_ILLEGAL,
                                     err);
                  
                  args->source = saveSource;
@ -569,7 +569,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
  if (args->source >= args->sourceLimit) 
    {
      *err = U_INDEX_OUTOFBOUNDS_ERROR;
-      return 0xFFFD;
+      return 0xffff;
    }
  
  myByte = (uint8_t)*(args->source++);
@ -587,7 +587,7 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
  if ((args->source + extraBytesToWrite - 1) > args->sourceLimit)
    {
      *err = U_TRUNCATED_CHAR_FOUND;
-      return 0xFFFD;
+      return 0xffff;
    }
  else
    {
@ -635,24 +635,20 @@ UChar32 T_UConverter_getNextUChar_UTF8(UConverterToUnicodeArgs *args,

 CALL_ERROR_FUNCTION:
  {      
-    /*rewinds source*/
-    const char* sourceFinal = args->source;
    UChar myUChar = (UChar)ch; /* ### TODO: this is a hack until we prepare the callbacks for code points */
    UChar* myUCharPtr = &myUChar;
    
    *err = U_ILLEGAL_CHAR_FOUND;
-    args->source = sourceInitial;
    
    /*It is very likely that the ErrorFunctor will write to the
     *internal buffers */
    args->target = myUCharPtr;
    args->targetLimit = myUCharPtr + 1;
-    args->source = sourceFinal;
    args->converter->fromCharErrorBehaviour(args->converter->toUContext,
                                    args,
-                                    sourceFinal,
-                                    args->sourceLimit-sourceFinal,
-                                    UCNV_UNASSIGNED,
+                                    sourceInitial,
+                                    args->source-sourceInitial,
+                                    UCNV_ILLEGAL,
                                    err);

    
@ -820,7 +816,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
          *err = U_TRUNCATED_CHAR_FOUND;
        }
      
-      return 0xFFFD;
+      return 0xffff;
    }
  
  
@ -834,7 +830,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,

    if (args->source+2 > args->sourceLimit) {
      *err = U_TRUNCATED_CHAR_FOUND;
-      return 0xFFFD;
+      return 0xffff;
    }

    /* get the second surrogate and assemble the code point */
@ -1009,7 +1005,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
          *err = U_TRUNCATED_CHAR_FOUND;
        }
      
-      return 0xFFFD;
+      return 0xffff;
    }
  

@ -1023,7 +1019,7 @@ UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,

    if (args->source+2 > args->sourceLimit) {
      *err = U_TRUNCATED_CHAR_FOUND;
-      return 0xFFFD;
+      return 0xffff;
    }

    /* get the second surrogate and assemble the code point */
--- a/icu4c/source/common/ucnvlat1.c
+++ b/icu4c/source/common/ucnvlat1.c
@ -128,7 +128,7 @@ static void   T_UConverter_fromUnicode_LATIN_1 (UConverterFromUnicodeArgs * args
                                     args->converter->invalidUCharLength,
                                     (UChar32) (args->converter->invalidUCharLength == 2 ? 
                                         UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
-                                                              args->converter->invalidUCharBuffer[2]) 
+                                                              args->converter->invalidUCharBuffer[1]) 
                                                : args->converter->invalidUCharBuffer[0]),
                                     reason,
                                     err);
@ -168,7 +168,7 @@ static UChar32 T_UConverter_getNextUChar_LATIN_1(UConverterToUnicodeArgs* args,
  if (args->source+1 > args->sourceLimit) 
    {
      *err = U_INDEX_OUTOFBOUNDS_ERROR;
-      return 0xFFFD;
+      return 0xffff;
    }

  /* make sure that we zero-extend, not sign-extend, the byte */
--- a/icu4c/source/common/ucnvsbcs.c
+++ b/icu4c/source/common/ucnvsbcs.c
@ -82,7 +82,7 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
          /*gets the corresponding UniChar */
          targetUniChar = myToUnicode[(unsigned char) mySource[mySourceIndex++]];

-          if (targetUniChar != missingUCharMarker)
+          if (targetUniChar < 0xfffe)
            {
              /* writes the UniChar to the output stream */
              myTarget[myTargetIndex++] = targetUniChar;
@ -93,19 +93,30 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
                  (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
              {
                  /* Look up in the fallback table first */
-                  targetUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
-                  if (targetUniChar != missingUCharMarker)
+                  UChar fallbackUniChar = myToUnicodeFallback[(unsigned char) mySource[mySourceIndex-1]];
+                  if (fallbackUniChar < 0xfffe)
                  {
-                      myTarget[myTargetIndex++] = targetUniChar;
+                      myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
                  }
              }
-              if (targetUniChar == missingUCharMarker)
+              if (targetUniChar >= 0xfffe)
              {
                  const char *saveSource = args->source;
                  UChar *saveTarget = args->target;
                  int32_t *saveOffsets = args->offsets;
+                  UConverterCallbackReason reason;
+
+                  if (targetUniChar == 0xfffe)
+                  {
+                    reason = UCNV_UNASSIGNED;
+                    *err = U_INVALID_CHAR_FOUND;
+                  }
+                  else
+                  {
+                    reason = UCNV_ILLEGAL;
+                    *err = U_ILLEGAL_CHAR_FOUND;
+                  }

-                  *err = U_INVALID_CHAR_FOUND;
                  args->converter->invalidCharBuffer[0] = (char) mySource[mySourceIndex - 1];
                  args->converter->invalidCharLength = 1;

@ -117,7 +128,7 @@ void T_UConverter_toUnicode_SBCS (UConverterToUnicodeArgs * args,
                                     args,
                                     args->converter->invalidCharBuffer,
                                     args->converter->invalidCharLength, 
-                                     UCNV_UNASSIGNED,
+                                     reason,
                                     err);
                  /* Hsys: calculate the source and target advancement */
                  args->source = saveSource;
@ -238,7 +249,7 @@ void T_UConverter_fromUnicode_SBCS (UConverterFromUnicodeArgs * args,
                                         args->converter->invalidUCharLength,
                                         (UChar32) (args->converter->invalidUCharLength == 2 ? 
                                             UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
-                                                                  args->converter->invalidUCharBuffer[2]) 
+                                                                  args->converter->invalidUCharBuffer[1]) 
                                                    : args->converter->invalidUCharBuffer[0]),
                                         reason,
                                         err);
@ -273,45 +284,52 @@ UChar32 T_UConverter_getNextUChar_SBCS(UConverterToUnicodeArgs* args,
 {
  UChar myUChar;
  
-  if (U_FAILURE(*err)) return 0xFFFD;
+  if (U_FAILURE(*err)) return 0xffff;

  if (args->source+1 > args->sourceLimit) 
    {
      *err = U_INDEX_OUTOFBOUNDS_ERROR;
-      return 0xFFFD;
+      return 0xffff;
    }
  
  /*Gets the corresponding codepoint*/
  myUChar = args->converter->sharedData->table->sbcs.toUnicode[(unsigned char)*(args->source++)];
  
-  if (myUChar != 0xFFFD) return myUChar;
+  if (myUChar < 0xfffe) return myUChar;
  else
    {      
      UChar* myUCharPtr = &myUChar;
-      const char* sourceFinal = args->source;
+      UConverterCallbackReason reason;

      /* Do the fallback stuff */
      if ((args->converter->useFallback == TRUE)&&
          (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
      {
-          myUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
-          if (myUChar != 0xFFFD) return myUChar;
+          UChar fallbackUChar = args->converter->sharedData->table->sbcs.toUnicodeFallback[ (unsigned char)*(args->source-1)];
+          if (fallbackUChar < 0xfffe) return fallbackUChar;
      }

-      *err = U_INVALID_CHAR_FOUND;
-      
-      /*Calls the ErrorFunctor after rewinding the input buffer*/
-      args->source--;
+      if (myUChar == 0xfffe)
+      {
+        reason = UCNV_UNASSIGNED;
+        *err = U_INVALID_CHAR_FOUND;
+      }
+      else
+      {
+        reason = UCNV_ILLEGAL;
+        *err = U_ILLEGAL_CHAR_FOUND;
+      }
+
+      /*Calls the ErrorFunctor */
      /*It's is very likely that the ErrorFunctor will write to the
       *internal buffers */
      args->target = myUCharPtr;
      args->targetLimit = myUCharPtr + 1;
-      args->source = sourceFinal;
      args->converter->fromCharErrorBehaviour(args->converter->toUContext,
                                    args,
-                                    sourceFinal,
+                                    args->source - 1,
                                    1,
-                                    UCNV_UNASSIGNED,
+                                    reason,
                                    err);

      /*makes the internal caching transparent to the user*/
@ -429,7 +447,7 @@ void   T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
              targetUniChar = (UChar) ucmp16_getu (myToUnicode, mySourceChar);

              /*writing the UniChar to the output stream */
-              if (targetUniChar != missingUCharMarker)
+              if (targetUniChar < 0xfffe)
                {
                  /*writes the UniChar to the output stream */
                  myTarget[myTargetIndex++] = targetUniChar;
@ -437,19 +455,30 @@ void   T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
              else if ((args->converter->useFallback == TRUE) &&
                  (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
              {
-                  targetUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
-                  if (targetUniChar != missingUCharMarker)
+                  UChar fallbackUniChar = (UChar) ucmp16_getu(myToUnicodeFallback, mySourceChar);
+                  if (fallbackUniChar < 0xfffe)
                  {
-                      myTarget[myTargetIndex++] = targetUniChar;
+                      myTarget[myTargetIndex++] = targetUniChar = fallbackUniChar;
                  }
              }
-              if (targetUniChar == missingUCharMarker)
+              if (targetUniChar >= 0xfffe)
                {
                  const char *saveSource = args->source;
                  UChar *saveTarget = args->target;
                  int32_t *saveOffsets = args->offsets;
+                  UConverterCallbackReason reason;
+
+                  if (targetUniChar == 0xfffe)
+                  {
+                    reason = UCNV_UNASSIGNED;
+                    *err = U_INVALID_CHAR_FOUND;
+                  }
+                  else
+                  {
+                    reason = UCNV_ILLEGAL;
+                    *err = U_ILLEGAL_CHAR_FOUND;
+                  }

-                  *err = U_INVALID_CHAR_FOUND;
                  args->converter->invalidCharBuffer[0] = (char) (mySourceChar >> 8);
                  args->converter->invalidCharBuffer[1] = (char) mySourceChar;
                  args->converter->invalidCharLength = 2;
@ -462,7 +491,7 @@ void   T_UConverter_toUnicode_DBCS (UConverterToUnicodeArgs * args,
                                     args,
                                     args->converter->invalidCharBuffer,
                                     args->converter->invalidCharLength, 
-                                     UCNV_UNASSIGNED,
+                                     reason,
                                     err);
                  /* Hsys: calculate the source and target advancement */
                  args->source = saveSource;
@ -616,7 +645,7 @@ void   T_UConverter_fromUnicode_DBCS (UConverterFromUnicodeArgs * args,
                                         args->converter->invalidUCharLength,
                                         (UChar32) (args->converter->invalidUCharLength == 2 ? 
                                             UTF16_GET_PAIR_VALUE(args->converter->invalidUCharBuffer[0], 
-                                                                  args->converter->invalidUCharBuffer[2]) 
+                                                                  args->converter->invalidUCharBuffer[1]) 
                                                    : args->converter->invalidUCharBuffer[0]),
                                         reason,
                                         err);
@ -650,7 +679,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
 {
  UChar myUChar;
  
-  if (U_FAILURE(*err)) return 0xFFFD;
+  if (U_FAILURE(*err)) return 0xffff;
  /*Checks boundaries and set appropriate error codes*/
  if (args->source+2 > args->sourceLimit) 
    {
@ -665,7 +694,7 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
          *err = U_TRUNCATED_CHAR_FOUND;
        }
      
-      return 0xFFFD;
+      return 0xffff;
    }

  /*Gets the corresponding codepoint*/
@ -674,39 +703,45 @@ UChar32 T_UConverter_getNextUChar_DBCS(UConverterToUnicodeArgs* args,
  
  /*update the input pointer*/
  args->source += 2;
-  if (myUChar != 0xFFFD) return myUChar;
+  if (myUChar < 0xfffe) return myUChar;
  else
    {      
      UChar* myUCharPtr = &myUChar;
-      const char* sourceFinal = args->source;
+      UConverterCallbackReason reason;

-      /* rewinding the input buffer*/
-      args->source -= 2;
      /* Do the fallback stuff */
      if ((args->converter->useFallback == TRUE) &&
          (args->converter->sharedData->staticData->hasToUnicodeFallback == TRUE))
      {
-          myUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
+          UChar fallbackUChar = ucmp16_getu((&args->converter->sharedData->table->dbcs.toUnicodeFallback),
                            (uint16_t)(((UChar)((*(args->source))) << 8) |((uint8_t)*(args->source-1))));
-          if (myUChar != 0xFFFD) 
+          if (fallbackUChar < 0xfffe)
          {
              args->source += 2;
-              return myUChar;
+              return fallbackUChar;
          }
      }
      
-      *err = U_INVALID_CHAR_FOUND;
-    
+      if (myUChar == 0xfffe)
+      {
+        reason = UCNV_UNASSIGNED;
+        *err = U_INVALID_CHAR_FOUND;
+      }
+      else
+      {
+        reason = UCNV_ILLEGAL;
+        *err = U_ILLEGAL_CHAR_FOUND;
+      }
+
      args->target = myUCharPtr;
      args->targetLimit = myUCharPtr + 1;
-      args->source = sourceFinal;
      /*It's is very likely that the ErrorFunctor will write to the
       *internal buffers */
      args->converter->fromCharErrorBehaviour(args->converter->toUContext,
                                    args,
-                                    sourceFinal,
+                                    args->source - 2,
                                    2,
-                                    UCNV_UNASSIGNED,
+                                    reason,
                                    err);
      /*makes the internal caching transparent to the user*/
      if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
--- a/icu4c/source/test/cintltst/ncnvtst.c
+++ b/icu4c/source/test/cintltst/ncnvtst.c
@ -279,7 +279,7 @@ void TestToUnicodeErrorBehaviour()
  
 }
 void TestGetNextErrorBehaviour(){
-   /*Test for Illegal character*/
+   /*Test for unassigned character*/
    static const char input1[]={ (char)0x70 };
    const char* source=(const char*)input1;
    UErrorCode err=U_ZERO_ERROR;
@ -287,10 +287,11 @@ void TestGetNextErrorBehaviour(){
    UConverter *cnv=ucnv_open("ibm-1159", &err);
    if(U_FAILURE(err)) {
        log_err("Unable to open a SBCS(ibm-1159) converter: %s\n", u_errorName(err));
+        return;
    }
    c=ucnv_getNextUChar(cnv, &source, source+sizeof(source), &err);
-    if(err != U_INVALID_CHAR_FOUND && c!= 0xFFFD){
-        log_err("FAIL: Expected: U_INVALID_CHAR_ERROR ----Got:%s\n Expected 0xFFFD Got %lx\n",  myErrorName(err), c);
+    if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
+        log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
    }
    ucnv_close(cnv);
     
--- a/icu4c/source/test/cintltst/nucnvtst.c
+++ b/icu4c/source/test/cintltst/nucnvtst.c
@ -1678,13 +1678,13 @@ TestLMBCS() {
         {
              log_err("Unexpected pointer move in 0 byte source request \n");
         }
-         /*0 byte source request - GetNextUChar : error & value == FFFD */
+         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
         uniChar = ucnv_getNextUChar(cnv, &pLIn, pLIn, &errorCode);
         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
         {
            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
         }
-         if (uniChar != 0xFFFD) /* would like to use an exported define here */
+         if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
         {
            log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
         }
--- a/icu4c/source/tools/makeconv/makeconv.c
+++ b/icu4c/source/tools/makeconv/makeconv.c
@ -193,7 +193,7 @@ static UDataInfo dataInfo={
    0,

    0x63, 0x6e, 0x76, 0x74,     /* dataFormat="cnvt" */
-    4, 0, 0, 0,                 /* formatVersion -- the new MBCS format needs at least 5.0.0.0 */
+    5, 0, 0, 0,                 /* formatVersion */
    1, 6, 0, 0                  /* dataVersion */
 };

@ -203,7 +203,6 @@ void writeConverterData(UConverterSharedData *mySharedData,
                        const char *cnvDir, 
                        UErrorCode *status)
 {
-  UVersionInfo generalFormatVersion;
  UNewDataMemory *mem = NULL;
  uint32_t sz2;
  
@ -212,16 +211,7 @@ void writeConverterData(UConverterSharedData *mySharedData,
      return;
    }

-  uprv_memcpy(&generalFormatVersion, &dataInfo.formatVersion, sizeof(UVersionInfo));
-  if(mySharedData->staticData->conversionType==UCNV_MBCS && dataInfo.formatVersion[0]<5) {
-    /* adjust the formatVersion for MBCS if necessary */
-    dataInfo.formatVersion[0]=5;
-    dataInfo.formatVersion[1]=0;
-    dataInfo.formatVersion[2]=0;
-    dataInfo.formatVersion[3]=0;
-  }
  mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
-  uprv_memcpy(&dataInfo.formatVersion, &generalFormatVersion, sizeof(UVersionInfo));

  if(U_FAILURE(*status))
    {
@ -639,17 +629,15 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
  char storageLine[UCNV_MAX_LINE_TEXT];
  char* line = NULL;
  UConverterTable* myUConverterTable = NULL;
-  UChar unicodeValue = 0xFFFF;
+  UChar unicodeValue = 0xfffe;
  int32_t sbcsCodepageValue = 0, fallback = 0;
  UBool seenFallback = FALSE;
  char codepointBytes[5];
-  unsigned char replacementChar = '\0';
  int32_t i = 0;
  CompactByteArray *myFromUnicode = NULL, *myFromUnicodeFallback = NULL;

  
  if (U_FAILURE(*err)) return NULL;
-  replacementChar = myConverter->subChar[0];
  myUConverterTable = (UConverterTable*)uprv_malloc(sizeof(UConverterSBCSTable));

  if (myUConverterTable == NULL) 
@ -725,8 +713,8 @@ UConverterTable *loadSBCSTableFromFile(FileStream* convFile, UConverterStaticDat
  seenFallback = FALSE;
  for (i = 0; i < 256; i++) 
  {
-      if ((myUConverterTable->sbcs.toUnicode[i] == 0xFFFF) &&
-          (myUConverterTable->sbcs.toUnicodeFallback[i] != 0xFFFF))
+      if ((myUConverterTable->sbcs.toUnicode[i] >= 0xfffe) &&
+          (myUConverterTable->sbcs.toUnicodeFallback[i] < 0xfffe))
          
      {
          seenFallback = TRUE;
@ -835,7 +823,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
  char storageLine[UCNV_MAX_LINE_TEXT];
  char* line = NULL;
  UConverterTable* myUConverterTable = NULL;
-  UChar unicodeValue = 0xFFFF;
+  UChar unicodeValue = 0xfffe;
  int32_t mbcsCodepageValue = '\0';
  char codepointBytes[6];
  int32_t replacementChar = 0x0000, fallback = 0;
@ -862,7 +850,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
  myFromUnicode = &myUConverterTable->dbcs.fromUnicode;
  ucmp16_init(myFromUnicode, (uint16_t)replacementChar);
  myToUnicode = &myUConverterTable->dbcs.toUnicode;
-  ucmp16_init(myToUnicode, (int16_t)0xFFFD);  
+  ucmp16_init(myToUnicode, (int16_t)0xfffe);  

  myFromUnicodeFallback = &myUConverterTable->dbcs.fromUnicodeFallback;
  ucmp16_initBogus(myFromUnicodeFallback);
@ -907,7 +895,7 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
              {
                  myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
                  ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
-                  ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
+                  ucmp16_init(myToUnicodeFallback, (uint16_t)0xfffe);
              }
              ucmp16_set(myToUnicodeFallback, (int16_t)mbcsCodepageValue, unicodeValue);
              ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)mbcsCodepageValue);
@ -919,8 +907,8 @@ UConverterTable *loadEBCDIC_STATEFULTableFromFile(FileStream* convFile, UConvert
  {
      for (i = 0; i < (uint32_t)ucmp16_getkUnicodeCount(); i++) 
      {
-        if ((ucmp16_getu(myToUnicode, i) == 0xFFFD) &&
-            (ucmp16_getu(myToUnicodeFallback, i) != 0xFFFD))
+        if ((ucmp16_getu(myToUnicode, i) >= 0xfffe) &&
+            (ucmp16_getu(myToUnicodeFallback, i) < 0xfffe))
        {
            seenFallback = TRUE;
            break;
@ -950,7 +938,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
  char storageLine[UCNV_MAX_LINE_TEXT];
  char* line = NULL;
  UConverterTable* myUConverterTable = NULL;
-  UChar unicodeValue = 0xFFFD;
+  UChar unicodeValue = 0xfffe;
  int32_t dbcsCodepageValue = '\0';
  char codepointBytes[6];
  int32_t replacementChar = 0x0000, fallback = 0;
@ -976,7 +964,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
  myFromUnicode = &(myUConverterTable->dbcs.fromUnicode);
  ucmp16_init(myFromUnicode, (int16_t)replacementChar);
  myToUnicode = &(myUConverterTable->dbcs.toUnicode);
-  ucmp16_init(myToUnicode, (int16_t)0xFFFD);
+  ucmp16_init(myToUnicode, (int16_t)0xfffe);
  
  myFromUnicodeFallback = &(myUConverterTable->dbcs.fromUnicodeFallback);
  ucmp16_initBogus(myFromUnicodeFallback);
@ -1020,7 +1008,7 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
          {
              myConverter->hasFromUnicodeFallback = myConverter->hasToUnicodeFallback = seenFallback = TRUE;
              ucmp16_init(myFromUnicodeFallback, (uint16_t)replacementChar);
-              ucmp16_init(myToUnicodeFallback, (uint16_t)0xFFFD);
+              ucmp16_init(myToUnicodeFallback, (uint16_t)0xfffe);
          }
          ucmp16_set(myToUnicodeFallback, (int16_t)dbcsCodepageValue, unicodeValue);
          ucmp16_set(myFromUnicodeFallback, unicodeValue, (int16_t)dbcsCodepageValue);
@ -1031,8 +1019,8 @@ UConverterTable * loadDBCSTableFromFile(FileStream* convFile, UConverterStaticDa
  {
      for (i = 0; i < (uint32_t)ucmp16_getkUnicodeCount(); i++) 
      {
-        if ((ucmp16_getu(myToUnicode, i) == 0xFFFD) &&
-            (ucmp16_getu(myToUnicodeFallback, i) != 0xFFFD))
+        if ((ucmp16_getu(myToUnicode, i) >= 0xfffe) &&
+            (ucmp16_getu(myToUnicodeFallback, i) < 0xfffe))
        {
            seenFallback = TRUE;
            break;