2017-01-20 00:20:31 +00:00
|
|
|
// © 2016 and later: Unicode, Inc. and others.
|
2016-06-15 18:58:17 +00:00
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
2003-06-04 01:06:48 +00:00
|
|
|
/*
|
|
|
|
******************************************************************************
|
|
|
|
*
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (C) 2003, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
2003-06-04 01:06:48 +00:00
|
|
|
*
|
|
|
|
******************************************************************************
|
|
|
|
*/
|
2003-02-28 21:37:55 +00:00
|
|
|
/*
|
|
|
|
punycode.c from draft-ietf-idn-punycode-03
|
|
|
|
http://www.nicemice.net/idn/
|
|
|
|
Adam M. Costello
|
|
|
|
http://www.nicemice.net/amc/
|
|
|
|
|
|
|
|
This is ANSI C code (C89) implementing
|
|
|
|
Punycode (draft-ietf-idn-punycode-03).
|
|
|
|
|
2003-05-06 20:56:38 +00:00
|
|
|
Disclaimer and license
|
|
|
|
|
|
|
|
Regarding this entire document or any portion of it (including
|
|
|
|
the pseudocode and C code), the author makes no guarantees and
|
|
|
|
is not responsible for any damage resulting from its use. The
|
|
|
|
author grants irrevocable permission to anyone to use, modify,
|
|
|
|
and distribute it in any way that does not diminish the rights
|
|
|
|
of anyone else to use, modify, and distribute it, provided that
|
|
|
|
redistributed derivative works do not contain misleading author or
|
|
|
|
version information. Derivative works need not be licensed under
|
|
|
|
similar terms.
|
|
|
|
|
2003-02-28 21:37:55 +00:00
|
|
|
*/
|
|
|
|
#ifndef _PUNYREF_H
|
|
|
|
#define _PUNYREF_H
|
|
|
|
|
|
|
|
/************************************************************/
|
|
|
|
/* Public interface (would normally go in its own .h file): */
|
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
|
2003-05-06 01:22:23 +00:00
|
|
|
#if !UCONFIG_NO_IDNA
|
|
|
|
|
2003-02-28 21:37:55 +00:00
|
|
|
enum punycode_status {
|
|
|
|
punycode_success,
|
|
|
|
punycode_bad_input, /* Input is invalid. */
|
|
|
|
punycode_big_output, /* Output would exceed the space provided. */
|
|
|
|
punycode_overflow /* Input needs wider integers to process. */
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2003-04-25 23:41:23 +00:00
|
|
|
typedef uint32_t punycode_uint;
|
2003-02-28 21:37:55 +00:00
|
|
|
|
2003-03-20 01:11:51 +00:00
|
|
|
U_CDECL_BEGIN
|
2003-02-28 21:37:55 +00:00
|
|
|
|
2003-03-20 01:11:51 +00:00
|
|
|
enum punycode_status punycode_encode(
|
2003-02-28 21:37:55 +00:00
|
|
|
punycode_uint input_length,
|
|
|
|
const punycode_uint input[],
|
|
|
|
const unsigned char case_flags[],
|
|
|
|
punycode_uint *output_length,
|
|
|
|
char output[] );
|
|
|
|
|
|
|
|
/* punycode_encode() converts Unicode to Punycode. The input */
|
|
|
|
/* is represented as an array of Unicode code points (not code */
|
|
|
|
/* units; surrogate pairs are not allowed), and the output */
|
|
|
|
/* will be represented as an array of ASCII code points. The */
|
|
|
|
/* output string is *not* null-terminated; it will contain */
|
|
|
|
/* zeros if and only if the input contains zeros. (Of course */
|
|
|
|
/* the caller can leave room for a terminator and add one if */
|
|
|
|
/* needed.) The input_length is the number of code points in */
|
|
|
|
/* the input. The output_length is an in/out argument: the */
|
|
|
|
/* caller passes in the maximum number of code points that it */
|
|
|
|
/* can receive, and on successful return it will contain the */
|
|
|
|
/* number of code points actually output. The case_flags array */
|
|
|
|
/* holds input_length boolean values, where nonzero suggests that */
|
|
|
|
/* the corresponding Unicode character be forced to uppercase */
|
|
|
|
/* after being decoded (if possible), and zero suggests that */
|
|
|
|
/* it be forced to lowercase (if possible). ASCII code points */
|
|
|
|
/* are encoded literally, except that ASCII letters are forced */
|
|
|
|
/* to uppercase or lowercase according to the corresponding */
|
|
|
|
/* uppercase flags. If case_flags is a null pointer then ASCII */
|
|
|
|
/* letters are left as they are, and other code points are */
|
|
|
|
/* treated as if their uppercase flags were zero. The return */
|
|
|
|
/* value can be any of the punycode_status values defined above */
|
|
|
|
/* except punycode_bad_input; if not punycode_success, then */
|
|
|
|
/* output_size and output might contain garbage. */
|
|
|
|
|
2003-03-20 01:11:51 +00:00
|
|
|
enum punycode_status punycode_decode(
|
2003-02-28 21:37:55 +00:00
|
|
|
punycode_uint input_length,
|
|
|
|
const char input[],
|
|
|
|
punycode_uint *output_length,
|
|
|
|
punycode_uint output[],
|
|
|
|
unsigned char case_flags[] );
|
|
|
|
|
|
|
|
/* punycode_decode() converts Punycode to Unicode. The input is */
|
|
|
|
/* represented as an array of ASCII code points, and the output */
|
|
|
|
/* will be represented as an array of Unicode code points. The */
|
|
|
|
/* input_length is the number of code points in the input. The */
|
|
|
|
/* output_length is an in/out argument: the caller passes in */
|
|
|
|
/* the maximum number of code points that it can receive, and */
|
|
|
|
/* on successful return it will contain the actual number of */
|
|
|
|
/* code points output. The case_flags array needs room for at */
|
|
|
|
/* least output_length values, or it can be a null pointer if the */
|
|
|
|
/* case information is not needed. A nonzero flag suggests that */
|
|
|
|
/* the corresponding Unicode character be forced to uppercase */
|
|
|
|
/* by the caller (if possible), while zero suggests that it be */
|
|
|
|
/* forced to lowercase (if possible). ASCII code points are */
|
|
|
|
/* output already in the proper case, but their flags will be set */
|
|
|
|
/* appropriately so that applying the flags would be harmless. */
|
|
|
|
/* The return value can be any of the punycode_status values */
|
|
|
|
/* defined above; if not punycode_success, then output_length, */
|
|
|
|
/* output, and case_flags might contain garbage. On success, the */
|
|
|
|
/* decoder will never need to write an output_length greater than */
|
|
|
|
/* input_length, because of how the encoding is defined. */
|
2003-03-20 01:11:51 +00:00
|
|
|
U_CDECL_END
|
2003-02-28 21:37:55 +00:00
|
|
|
|
2003-05-06 01:22:23 +00:00
|
|
|
#endif /* #if !UCONFIG_NO_IDNA */
|
|
|
|
|
|
|
|
#endif
|