331 lines
13 KiB
C++
331 lines
13 KiB
C++
|
// © 2016 and later: Unicode, Inc. and others.
|
||
|
// License & terms of use: http://www.unicode.org/copyright.html
|
||
|
/*
|
||
|
*******************************************************************************
|
||
|
* Copyright (C) 2010-2012, International Business Machines
|
||
|
* Corporation and others. All Rights Reserved.
|
||
|
*******************************************************************************
|
||
|
* file name: idna.h
|
||
|
* encoding: UTF-8
|
||
|
* tab size: 8 (not used)
|
||
|
* indentation:4
|
||
|
*
|
||
|
* created on: 2010mar05
|
||
|
* created by: Markus W. Scherer
|
||
|
*/
|
||
|
|
||
|
#ifndef __IDNA_H__
|
||
|
#define __IDNA_H__
|
||
|
|
||
|
/**
|
||
|
* \file
|
||
|
* \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
|
||
|
*/
|
||
|
|
||
|
#include "unicode/utypes.h"
|
||
|
|
||
|
#if U_SHOW_CPLUSPLUS_API
|
||
|
|
||
|
#if !UCONFIG_NO_IDNA
|
||
|
|
||
|
#include "unicode/bytestream.h"
|
||
|
#include "unicode/stringpiece.h"
|
||
|
#include "unicode/uidna.h"
|
||
|
#include "unicode/unistr.h"
|
||
|
|
||
|
U_NAMESPACE_BEGIN
|
||
|
|
||
|
class IDNAInfo;
|
||
|
|
||
|
/**
|
||
|
* Abstract base class for IDNA processing.
|
||
|
* See http://www.unicode.org/reports/tr46/
|
||
|
* and http://www.ietf.org/rfc/rfc3490.txt
|
||
|
*
|
||
|
* The IDNA class is not intended for public subclassing.
|
||
|
*
|
||
|
* This C++ API currently only implements UTS #46.
|
||
|
* The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
|
||
|
* and IDNA2003 (functions that do not use a service object).
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
class U_COMMON_API IDNA : public UObject {
|
||
|
public:
|
||
|
/**
|
||
|
* Destructor.
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
~IDNA();
|
||
|
|
||
|
/**
|
||
|
* Returns an IDNA instance which implements UTS #46.
|
||
|
* Returns an unmodifiable instance, owned by the caller.
|
||
|
* Cache it for multiple operations, and delete it when done.
|
||
|
* The instance is thread-safe, that is, it can be used concurrently.
|
||
|
*
|
||
|
* UTS #46 defines Unicode IDNA Compatibility Processing,
|
||
|
* updated to the latest version of Unicode and compatible with both
|
||
|
* IDNA2003 and IDNA2008.
|
||
|
*
|
||
|
* The worker functions use transitional processing, including deviation mappings,
|
||
|
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
|
||
|
* is used in which case the deviation characters are passed through without change.
|
||
|
*
|
||
|
* Disallowed characters are mapped to U+FFFD.
|
||
|
*
|
||
|
* For available options see the uidna.h header.
|
||
|
* Operations with the UTS #46 instance do not support the
|
||
|
* UIDNA_ALLOW_UNASSIGNED option.
|
||
|
*
|
||
|
* By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
|
||
|
* When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
|
||
|
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
|
||
|
*
|
||
|
* @param options Bit set to modify the processing and error checking.
|
||
|
* See option bit set values in uidna.h.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return the UTS #46 IDNA instance, if successful
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
static IDNA *
|
||
|
createUTS46Instance(uint32_t options, UErrorCode &errorCode);
|
||
|
|
||
|
/**
|
||
|
* Converts a single domain name label into its ASCII form for DNS lookup.
|
||
|
* If any processing step fails, then info.hasErrors() will be true and
|
||
|
* the result might not be an ASCII string.
|
||
|
* The label might be modified according to the types of errors.
|
||
|
* Labels with severe errors will be left in (or turned into) their Unicode form.
|
||
|
*
|
||
|
* The UErrorCode indicates an error only in exceptional cases,
|
||
|
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||
|
*
|
||
|
* @param label Input domain name label
|
||
|
* @param dest Destination string object
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual UnicodeString &
|
||
|
labelToASCII(const UnicodeString &label, UnicodeString &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||
|
|
||
|
/**
|
||
|
* Converts a single domain name label into its Unicode form for human-readable display.
|
||
|
* If any processing step fails, then info.hasErrors() will be true.
|
||
|
* The label might be modified according to the types of errors.
|
||
|
*
|
||
|
* The UErrorCode indicates an error only in exceptional cases,
|
||
|
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||
|
*
|
||
|
* @param label Input domain name label
|
||
|
* @param dest Destination string object
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual UnicodeString &
|
||
|
labelToUnicode(const UnicodeString &label, UnicodeString &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||
|
|
||
|
/**
|
||
|
* Converts a whole domain name into its ASCII form for DNS lookup.
|
||
|
* If any processing step fails, then info.hasErrors() will be true and
|
||
|
* the result might not be an ASCII string.
|
||
|
* The domain name might be modified according to the types of errors.
|
||
|
* Labels with severe errors will be left in (or turned into) their Unicode form.
|
||
|
*
|
||
|
* The UErrorCode indicates an error only in exceptional cases,
|
||
|
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||
|
*
|
||
|
* @param name Input domain name
|
||
|
* @param dest Destination string object
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual UnicodeString &
|
||
|
nameToASCII(const UnicodeString &name, UnicodeString &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||
|
|
||
|
/**
|
||
|
* Converts a whole domain name into its Unicode form for human-readable display.
|
||
|
* If any processing step fails, then info.hasErrors() will be true.
|
||
|
* The domain name might be modified according to the types of errors.
|
||
|
*
|
||
|
* The UErrorCode indicates an error only in exceptional cases,
|
||
|
* such as a U_MEMORY_ALLOCATION_ERROR.
|
||
|
*
|
||
|
* @param name Input domain name
|
||
|
* @param dest Destination string object
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual UnicodeString &
|
||
|
nameToUnicode(const UnicodeString &name, UnicodeString &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const = 0;
|
||
|
|
||
|
// UTF-8 versions of the processing methods ---------------------------- ***
|
||
|
|
||
|
/**
|
||
|
* Converts a single domain name label into its ASCII form for DNS lookup.
|
||
|
* UTF-8 version of labelToASCII(), same behavior.
|
||
|
*
|
||
|
* @param label Input domain name label
|
||
|
* @param dest Destination byte sink; Flush()ed if successful
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual void
|
||
|
labelToASCII_UTF8(StringPiece label, ByteSink &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const;
|
||
|
|
||
|
/**
|
||
|
* Converts a single domain name label into its Unicode form for human-readable display.
|
||
|
* UTF-8 version of labelToUnicode(), same behavior.
|
||
|
*
|
||
|
* @param label Input domain name label
|
||
|
* @param dest Destination byte sink; Flush()ed if successful
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual void
|
||
|
labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const;
|
||
|
|
||
|
/**
|
||
|
* Converts a whole domain name into its ASCII form for DNS lookup.
|
||
|
* UTF-8 version of nameToASCII(), same behavior.
|
||
|
*
|
||
|
* @param name Input domain name
|
||
|
* @param dest Destination byte sink; Flush()ed if successful
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual void
|
||
|
nameToASCII_UTF8(StringPiece name, ByteSink &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const;
|
||
|
|
||
|
/**
|
||
|
* Converts a whole domain name into its Unicode form for human-readable display.
|
||
|
* UTF-8 version of nameToUnicode(), same behavior.
|
||
|
*
|
||
|
* @param name Input domain name
|
||
|
* @param dest Destination byte sink; Flush()ed if successful
|
||
|
* @param info Output container of IDNA processing details.
|
||
|
* @param errorCode Standard ICU error code. Its input value must
|
||
|
* pass the U_SUCCESS() test, or else the function returns
|
||
|
* immediately. Check for U_FAILURE() on output or use with
|
||
|
* function chaining. (See User Guide for details.)
|
||
|
* @return dest
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
virtual void
|
||
|
nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
|
||
|
IDNAInfo &info, UErrorCode &errorCode) const;
|
||
|
};
|
||
|
|
||
|
class UTS46;
|
||
|
|
||
|
/**
|
||
|
* Output container for IDNA processing errors.
|
||
|
* The IDNAInfo class is not suitable for subclassing.
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
class U_COMMON_API IDNAInfo : public UMemory {
|
||
|
public:
|
||
|
/**
|
||
|
* Constructor for stack allocation.
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
|
||
|
/**
|
||
|
* Were there IDNA processing errors?
|
||
|
* @return true if there were processing errors
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
UBool hasErrors() const { return errors!=0; }
|
||
|
/**
|
||
|
* Returns a bit set indicating IDNA processing errors.
|
||
|
* See UIDNA_ERROR_... constants in uidna.h.
|
||
|
* @return bit set of processing errors
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
uint32_t getErrors() const { return errors; }
|
||
|
/**
|
||
|
* Returns true if transitional and nontransitional processing produce different results.
|
||
|
* This is the case when the input label or domain name contains
|
||
|
* one or more deviation characters outside a Punycode label (see UTS #46).
|
||
|
* <ul>
|
||
|
* <li>With nontransitional processing, such characters are
|
||
|
* copied to the destination string.
|
||
|
* <li>With transitional processing, such characters are
|
||
|
* mapped (sharp s/sigma) or removed (joiner/nonjoiner).
|
||
|
* </ul>
|
||
|
* @return true if transitional and nontransitional processing produce different results
|
||
|
* @stable ICU 4.6
|
||
|
*/
|
||
|
UBool isTransitionalDifferent() const { return isTransDiff; }
|
||
|
|
||
|
private:
|
||
|
friend class UTS46;
|
||
|
|
||
|
IDNAInfo(const IDNAInfo &other); // no copying
|
||
|
IDNAInfo &operator=(const IDNAInfo &other); // no copying
|
||
|
|
||
|
void reset() {
|
||
|
errors=labelErrors=0;
|
||
|
isTransDiff=false;
|
||
|
isBiDi=false;
|
||
|
isOkBiDi=true;
|
||
|
}
|
||
|
|
||
|
uint32_t errors, labelErrors;
|
||
|
UBool isTransDiff;
|
||
|
UBool isBiDi;
|
||
|
UBool isOkBiDi;
|
||
|
};
|
||
|
|
||
|
U_NAMESPACE_END
|
||
|
|
||
|
#endif // UCONFIG_NO_IDNA
|
||
|
|
||
|
#endif /* U_SHOW_CPLUSPLUS_API */
|
||
|
|
||
|
#endif // __IDNA_H__
|