API Overview API Index Package Overview Direct link to this page
JDK 1.6
  java.net. IDN View Javadoc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451

/*
 * @(#)IDN.java	1.3 05/11/17
 *
 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
 * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */
package java.net;

import java.io.InputStream;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedAction;

import sun.net.idn.StringPrep;
import sun.net.idn.Punycode;
import sun.text.normalizer.UCharacterIterator;

/**
 * Provides methods to convert internationalized domain names (IDNs) between
 * a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
 * Internationalized domain names can use characters from the entire range of
 * Unicode, while traditional domain names are restricted to ASCII characters.
 * ACE is an encoding of Unicode strings that uses only ASCII characters and
 * can be used with software (such as the Domain Name System) that only
 * understands traditional domain names.
 *
 * <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
 * RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
 * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
 * profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
 * <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
 * domain name string back and forth.
 *
 * <p>The behavior of aforementioned conversion process can be adjusted by various flags:
 *   <ul>
 *     <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
 *         can contain code points that are unassigned in Unicode 3.2, which is the
 *         Unicode version on which IDN conversion is based. If the flag is not used,
 *         the presence of such unassigned code points is treated as an error.
 *     <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
 *         It is an error if they don't meet the requirements.
 *   </ul>
 * These flags can be logically OR'ed together.
 *
 * <p>The security consideration is important with respect to internationalization
 * domain name support. For example, English domain names may be <i>homographed</i>
 * - maliciously misspelled by substitution of non-Latin letters.
 * <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
 * discusses security issues of IDN support as well as possible solutions.
 * Applications are responsible for taking adequate security measures when using
 * international domain names.
 *
 * @version 1.3, 05/11/17
 * @author Edward Wang
 * @since 1.6
 *
 */
public final class IDN {
    /**
     * Flag to allow processing of unassigned code points
     */
    public static final int ALLOW_UNASSIGNED = 0x01;
    
    /**
     * Flag to turn on the check against STD-3 ASCII rules
     */
    public static final int USE_STD3_ASCII_RULES = 0x02;
    
    
    /**
     * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
     * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
     *
     * <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
     * If ToASCII operation fails, an IllegalArgumentException will be thrown.
     * In this case, the input string should not be used in an internationalized domain name.
     *
     * <p> A label is an individual part of a domain name. The original ToASCII operation,
     * as defined in RFC 3490, only operates on a single label. This method can handle
     * both label and entire domain name, by assuming that labels in a domain name are
     * always separated by dots. The following characters are recognized as dots:
     * &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
     * and &#0092;uFF61 (halfwidth ideographic full stop). if dots are
     * used as label separators, this method also changes all of them to &#0092;u002E (full stop)
     * in output translated string.
     *
     * @param input     the string to be processed
     * @param flag      process flag; can be 0 or any logical OR of possible flags
     *
     * @return          the translated <tt>String</tt>
     *
     * @throws IllegalArgumentException   if the input string doesn't conform to RFC 3490 specification
     */
    public static String toASCII(String input, int flag)
    {
        int p = 0, q = 0;
        StringBuffer out = new StringBuffer();
        
        while (p < input.length()) {
            q = searchDots(input, p);
            out.append(toASCIIInternal(input.substring(p, q),  flag));
            p = q + 1;
            if (p < input.length()) out.append('.');
        }
        
        return out.toString();
    }
    
    
    /**
     * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
     * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
     *
     * <p> This convenience method works as if by invoking the
     * two-argument counterpart as follows:
     * <blockquote><tt>
     * {@link #toASCII(String, int) toASCII}(input,&nbsp;0);
     * </tt></blockquote>
     *
     * @param input     the string to be processed
     *
     * @return          the translated <tt>String</tt>
     *
     * @throws IllegalArgumentException   if the input string doesn't conform to RFC 3490 specification
     */
    public static String toASCII(String input) {
        return toASCII(input, 0);
    }
    
    
    /**
     * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
     * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
     *
     * <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
     *
     * <p> A label is an individual part of a domain name. The original ToUnicode operation,
     * as defined in RFC 3490, only operates on a single label. This method can handle
     * both label and entire domain name, by assuming that labels in a domain name are
     * always separated by dots. The following characters are recognized as dots:
     * &#0092;u002E (full stop), &#0092;u3002 (ideographic full stop), &#0092;uFF0E (fullwidth full stop),
     * and &#0092;uFF61 (halfwidth ideographic full stop).
     *
     * @param input     the string to be processed
     * @param flag      process flag; can be 0 or any logical OR of possible flags
     *
     * @return          the translated <tt>String</tt>
     */
    public static String toUnicode(String input, int flag) {
        int p = 0, q = 0;
        StringBuffer out = new StringBuffer();
        
        while (p < input.length()) {
            q = searchDots(input, p);
            out.append(toUnicodeInternal(input.substring(p, q),  flag));
            p = q + 1;
            if (p < input.length()) out.append('.');
        }
        
        return out.toString();
    }
    
    
    /**
     * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
     * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
     *
     * <p> This convenience method works as if by invoking the
     * two-argument counterpart as follows:
     * <blockquote><tt>
     * {@link #toUnicode(String, int) toUnicode}(input,&nbsp;0);
     * </tt></blockquote>
     *
     * @param input     the string to be processed
     *
     * @return          the translated <tt>String</tt>
     */
    public static String toUnicode(String input) {
        return toUnicode(input, 0);
    }

    
    /* ---------------- Private members -------------- */
    
    // ACE Prefix is "xn--"
    private static final String ACE_PREFIX = "xn--";
    private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();

    private static final int MAX_LABEL_LENGTH   = 63;

    // single instance of nameprep
    private static StringPrep namePrep = null;
    
    static {
        InputStream stream = null;
        
        try {
            final String IDN_PROFILE = "uidna.spp";
            if (System.getSecurityManager() != null) {
                stream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
                    public InputStream run() {
                        return StringPrep.class.getResourceAsStream(IDN_PROFILE);
                    }
                });
            } else {
                stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
            }

            namePrep = new StringPrep(stream);
            stream.close();
        } catch (IOException e) {
            // should never reach here
            assert false;
        }
    }


    /* ---------------- Private operations -------------- */
    
    
    //
    // to suppress the default zero-argument constructor
    //
    private IDN() {}
    
    //
    // toASCII operation; should only apply to a single label
    //
    private static String toASCIIInternal(String label, int flag)
    {
        // step 1
        // Check if the string contains code points outside the ASCII range 0..0x7c.
        boolean isASCII  = isAllASCII(label);
        StringBuffer dest;
        
        // step 2
        // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
        if (!isASCII) {
            UCharacterIterator iter = UCharacterIterator.getInstance(label);
            try {
                dest = namePrep.prepare(iter, flag);
            } catch (java.text.ParseException e) {
                throw new IllegalArgumentException(e);
            }
        } else {
            dest = new StringBuffer(label);
        }
        
        // step 3
        // Verify the absence of non-LDH ASCII code points
        //   0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
        // Verify the absence of leading and trailing hyphen
        boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
        if (useSTD3ASCIIRules) {
            for (int i = 0; i < dest.length(); i++) {
                int c = dest.charAt(i);
                if (!isLDHChar(c)) {
                    throw new IllegalArgumentException("Contains non-LDH characters");
                }
            }
            
            if (dest.charAt(0) == '-' || dest.charAt(dest.length() - 1) == '-') {
                throw new IllegalArgumentException("Has leading or trailing hyphen");
            }
        }

        if (!isASCII) {
            // step 4
            // If all code points are inside 0..0x7f, skip to step 8
            if (!isAllASCII(dest.toString())) {
                // step 5
                // verify the sequence does not begin with ACE prefix
                if(!startsWithACEPrefix(dest)){
                    
                    // step 6
                    // encode the sequence with punycode
                    try {
                        dest = Punycode.encode(dest, null);
                    } catch (java.text.ParseException e) {
                        throw new IllegalArgumentException(e);
                    }
                    
                    dest = toASCIILower(dest);
                    
                    // step 7
                    // prepend the ACE prefix
                    dest.insert(0, ACE_PREFIX);
                } else {
                    throw new IllegalArgumentException("The input starts with the ACE Prefix");
                }
                
            }
        }

        // step 8
        // the length must be inside 1..63
        if(dest.length() > MAX_LABEL_LENGTH){
            throw new IllegalArgumentException("The label in the input is too long");
        }
        
        return dest.toString();
    }
    
    //
    // toUnicode operation; should only apply to a single label
    //
    private static String toUnicodeInternal(String label, int flag) {
        boolean[] caseFlags = null;
        StringBuffer dest;
        
        // step 1
        // find out if all the codepoints in input are ASCII
        boolean isASCII = isAllASCII(label);
        
        if(!isASCII){
            // step 2
            // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
            try {
                UCharacterIterator iter = UCharacterIterator.getInstance(label);
                dest = namePrep.prepare(iter, flag);
            } catch (Exception e) {
                // toUnicode never fails; if any step fails, return the input string
                return label;
            }
        } else {
            dest = new StringBuffer(label);
        }
        
        // step 3
        // verify ACE Prefix
        if(startsWithACEPrefix(dest)) {

            // step 4
            // Remove the ACE Prefix
            String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());

            try {
                // step 5
                // Decode using punycode
                StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
                
                // step 6
                // Apply toASCII
                String toASCIIOut = toASCII(decodeOut.toString(), flag);
                
                // step 7
                // verify
                if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
                    // step 8
                    // return output of step 5
                    return decodeOut.toString();
                }
            } catch (Exception ignored) {
                // no-op
            }
        }
        
        // just return the input
        return label;
    }

    
    //
    // LDH stands for "letter/digit/hyphen", with characters restricted to the
    // 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
    // <->
    // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x56..0x60, 0x7B..0x7F
    //
    private static boolean isLDHChar(int ch){
        // high runner case
        if(ch > 0x007A){
            return false;
        }
        //['-' '0'..'9' 'A'..'Z' 'a'..'z']
        if((ch == 0x002D) || 
           (0x0030 <= ch && ch <= 0x0039) ||
           (0x0041 <= ch && ch <= 0x005A) ||
           (0x0061 <= ch && ch <= 0x007A)
          ){
            return true;
        }
        return false;
    }
    
    
    //
    // search dots in a string and return the index of that character;
    // or if there is no dots, return the length of input string
    // dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
    // and \uFF61 (halfwidth ideographic full stop).
    //
    private static int searchDots(String s, int start) {
        int i;
        for (i = start; i < s.length(); i++) {
            char c = s.charAt(i);
            if (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61') {
                break;
            }
        }
        
        return i;
    }
    
    
    //
    // to check if a string only contains US-ASCII code point
    //
    private static boolean isAllASCII(String input) {
        boolean isASCII = true;
        for (int i = 0; i < input.length(); i++) {
            int c = input.charAt(i);
            if (c > 0x7F) {
                isASCII = false;
                break;
            }
        }
        return isASCII;
    }

    //
    // to check if a string starts with ACE-prefix
    //
    private static boolean startsWithACEPrefix(StringBuffer input){
        boolean startsWithPrefix = true;

        if(input.length() < ACE_PREFIX_LENGTH){
            return false;
        }
        for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
            if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
                startsWithPrefix = false;
            }
        }
        return startsWithPrefix;
    }

    private static char toASCIILower(char ch){
        if('A' <= ch && ch <= 'Z'){
            return (char)(ch + 'a' - 'A');
        }
        return ch;
    }

    private static StringBuffer toASCIILower(StringBuffer input){
        StringBuffer dest = new StringBuffer();
        for(int i = 0; i < input.length();i++){
            dest.append(toASCIILower(input.charAt(i)));
        }
        return dest;
    }
}

Generated By: JavaOnTracks Doclet 0.1.4     ©Thibaut Colar