001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 * 019 */ 020package org.apache.directory.api.ldap.model.schema; 021 022 023import java.text.Normalizer; 024 025import org.apache.directory.api.util.Strings; 026import org.apache.directory.api.util.exception.InvalidCharacterException; 027 028 029/** 030 * 031 * This class implements the 6 steps described in RFC 4518 032 * 033 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a> 034 */ 035public final class PrepareString 036{ 037 private enum NormStateEnum 038 { 039 START, 040 INITIAL_CHAR, 041 INITIAL_SPACES, 042 SPACES, 043 CHARS, 044 SPACE_CHAR, 045 END 046 } 047 048 /** A flag used to lowercase chars during the map process */ 049 public static final boolean CASE_SENSITIVE = true; 050 051 /** A flag used to keep casing during the map process */ 052 public static final boolean IGNORE_CASE = false; 053 054 /** 055 * The type of Assertion we have to normalize 056 */ 057 public enum AssertionType 058 { 059 /** The INITIAL part of a substring assertion value */ 060 SUBSTRING_INITIAL, 061 062 /** The ANY part of a substring assertion value */ 063 SUBSTRING_ANY, 064 065 /** The FINAL part of a substring assertion value */ 066 SUBSTRING_FINAL, 067 068 /** An Attribute Value */ 069 ATTRIBUTE_VALUE 070 } 071 072 /** An exception used to get out of the map method quickly */ 073 private static final ArrayIndexOutOfBoundsException AIOOBE = new ArrayIndexOutOfBoundsException(); 074 075 /** 076 * A private constructor, to avoid instance creation of this static class. 077 */ 078 private PrepareString() 079 { 080 // Do nothing 081 } 082 083 084 /** 085 * The first step defined by RFC 4518 : Transcode, which transform an 086 * UTF-8 encoded String to Unicode. This is done using the {@link Strings#utf8ToString} 087 * method. This 088 * 089 * @param bytes The byte[] to transcode 090 * @return The transcoded String 091 */ 092 public static String transcode( byte[] bytes ) 093 { 094 return Strings.utf8ToString( bytes ); 095 } 096 097 098 /** 099 * Normalize a String 100 * 101 * @param value the value to normalize 102 * @return The normalized value 103 */ 104 public static String normalize( String value ) 105 { 106 if ( !Normalizer.isNormalized( value, Normalizer.Form.NFKC ) ) 107 { 108 return Normalizer.normalize( value, Normalizer.Form.NFKC ); 109 } 110 else 111 { 112 return value; 113 } 114 } 115 116 117 /** 118 * Apply the RFC 4518 MAP transformation, case sensitive 119 * 120 * @param unicode The original String 121 * @return The mapped String 122 */ 123 public static String mapCaseSensitive( String unicode ) 124 { 125 try 126 { 127 return mapCaseSensitiveAscii( unicode ); 128 } 129 catch ( ArrayIndexOutOfBoundsException aioobe ) 130 { 131 // There 132 } 133 134 char[] source = unicode.toCharArray(); 135 136 // Create a target char array which is 3 times bigger than the original size. 137 // We have to do that because the map phase may transform a char to 138 // three chars. 139 // TODO : we have to find a way to prevent this waste of space. 140 char[] target = new char[unicode.length() * 3 + 2]; 141 142 int limit = 0; 143 144 for ( char c : source ) 145 { 146 switch ( c ) 147 { 148 case 0x0000: 149 case 0x0001: 150 case 0x0002: 151 case 0x0003: 152 case 0x0004: 153 case 0x0005: 154 case 0x0006: 155 case 0x0007: 156 case 0x0008: 157 // All other control code (e.g., Cc) points or code points with a 158 // control function (e.g., Cf) are mapped to nothing. The following is 159 // a complete list of these code points: U+0000-0008... 160 break; 161 162 case 0x0009: 163 case 0x000A: 164 case 0x000B: 165 case 0x000C: 166 case 0x000D: 167 // CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 168 // TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 169 // (U+000D), ... are mapped to SPACE (U+0020). 170 target[limit++] = 0x0020; 171 break; 172 173 case 0x000E: 174 case 0x000F: 175 case 0x0010: 176 case 0x0011: 177 case 0x0012: 178 case 0x0013: 179 case 0x0014: 180 case 0x0015: 181 case 0x0016: 182 case 0x0017: 183 case 0x0018: 184 case 0x0019: 185 case 0x001A: 186 case 0x001B: 187 case 0x001C: 188 case 0x001D: 189 case 0x001E: 190 case 0x001F: 191 // All other control code (e.g., Cc) points or code points with a 192 // control function (e.g., Cf) are mapped to nothing. The following is 193 // a complete list of these code points: ... U+000E-001F... 194 break; 195 196 case 0x0041: 197 case 0x0042: 198 case 0x0043: 199 case 0x0044: 200 case 0x0045: 201 case 0x0046: 202 case 0x0047: 203 case 0x0048: 204 case 0x0049: 205 case 0x004A: 206 case 0x004B: 207 case 0x004C: 208 case 0x004D: 209 case 0x004E: 210 case 0x004F: 211 case 0x0050: 212 case 0x0051: 213 case 0x0052: 214 case 0x0053: 215 case 0x0054: 216 case 0x0055: 217 case 0x0056: 218 case 0x0057: 219 case 0x0058: 220 case 0x0059: 221 case 0x005A: 222 // For case ignore, numeric, and stored prefix string matching rules, 223 // characters are case folded per B.2 of [RFC3454] : U+0041-005A 224 target[limit++] = c; 225 break; 226 227 case 0x007F: 228 case 0x0080: 229 case 0x0081: 230 case 0x0082: 231 case 0x0083: 232 case 0x0084: 233 // All other control code (e.g., Cc) points or code points with a 234 // control function (e.g., Cf) are mapped to nothing. The following is 235 // a complete list of these code points: ... U+007F-0084... 236 break; 237 238 case 0x0085: 239 // ... and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). 240 target[limit++] = 0x0020; 241 break; 242 243 case 0x0086: 244 case 0x0087: 245 case 0x0088: 246 case 0x0089: 247 case 0x008A: 248 case 0x008B: 249 case 0x008C: 250 case 0x008D: 251 case 0x008E: 252 case 0x008F: 253 case 0x0090: 254 case 0x0091: 255 case 0x0092: 256 case 0x0093: 257 case 0x0094: 258 case 0x0095: 259 case 0x0096: 260 case 0x0097: 261 case 0x0098: 262 case 0x0099: 263 case 0x009A: 264 case 0x009B: 265 case 0x009C: 266 case 0x009D: 267 case 0x009E: 268 case 0x009F: 269 // All other control code (e.g., Cc) points or code points with a 270 // control function (e.g., Cf) are mapped to nothing. The following is 271 // a complete list of these code points: ... U+0086-009F... 272 break; 273 274 case 0x00A0: 275 // All other code points with Separator (space, line, or paragraph) property 276 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 277 // list of these code points: ... 00A0 ... 278 target[limit++] = 0x0020; 279 break; 280 281 case 0x00AD: 282 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 283 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 284 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 285 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 286 // mapped to nothing. 287 break; 288 289 case 0x00B5: 290 // For case ignore, numeric, and stored prefix string matching rules, 291 // characters are case folded per B.2 of [RFC3454] : U+00B5 292 target[limit++] = 0x03BC; 293 break; 294 295 case 0x00C0: 296 case 0x00C1: 297 case 0x00C2: 298 case 0x00C3: 299 case 0x00C4: 300 case 0x00C5: 301 case 0x00C6: 302 case 0x00C7: 303 case 0x00C8: 304 case 0x00C9: 305 case 0x00CA: 306 case 0x00CB: 307 case 0x00CC: 308 case 0x00CD: 309 case 0x00CE: 310 case 0x00CF: 311 case 0x00D0: 312 case 0x00D1: 313 case 0x00D2: 314 case 0x00D3: 315 case 0x00D4: 316 case 0x00D5: 317 // no 0x00D7 318 case 0x00D6: 319 case 0x00D8: 320 case 0x00D9: 321 case 0x00DA: 322 case 0x00DB: 323 case 0x00DC: 324 case 0x00DD: 325 case 0x00DE: 326 // For case ignore, numeric, and stored prefix string matching rules, 327 // characters are case folded per B.2 of [RFC3454] : U+00C0-00D6, 328 // U+00D8-00DE 329 target[limit++] = c; 330 break; 331 332 case 0x00DF: 333 // For case ignore, numeric, and stored prefix string matching rules, 334 // characters are case folded per B.2 of [RFC3454] : U+00DF 335 target[limit++] = 0x0073; 336 target[limit++] = 0x0073; 337 break; 338 339 case 0x0100: 340 case 0x0102: 341 case 0x0104: 342 case 0x0106: 343 case 0x0108: 344 case 0x010A: 345 case 0x010C: 346 case 0x010E: 347 case 0x0110: 348 case 0x0112: 349 case 0x0114: 350 case 0x0116: 351 case 0x0118: 352 case 0x011A: 353 case 0x011C: 354 case 0x011E: 355 case 0x0120: 356 case 0x0122: 357 case 0x0124: 358 case 0x0126: 359 case 0x0128: 360 case 0x012A: 361 case 0x012C: 362 case 0x012E: 363 // For case ignore, numeric, and stored prefix string matching rules, 364 // characters are case folded per B.2 of [RFC3454] : U+0100-012E 365 target[limit++] = ( char ) ( c + 0x0001 ); 366 break; 367 368 case 0x0130: 369 // For case ignore, numeric, and stored prefix string matching rules, 370 // characters are case folded per B.2 of [RFC3454] : U+0130 371 target[limit++] = 0x0069; 372 target[limit++] = 0x0307; 373 break; 374 375 case 0x0132: 376 case 0x0134: 377 case 0x0136: 378 case 0x0139: 379 case 0x013B: 380 case 0x013D: 381 case 0x013F: 382 case 0x0141: 383 case 0x0143: 384 case 0x0145: 385 case 0x0147: 386 // For case ignore, numeric, and stored prefix string matching rules, 387 // characters are case folded per B.2 of [RFC3454] : U+0132-0147 388 target[limit++] = ( char ) ( c + 0x0001 ); 389 break; 390 391 case 0x0149: 392 // For case ignore, numeric, and stored prefix string matching rules, 393 // characters are case folded per B.2 of [RFC3454] : U+0149 394 target[limit++] = 0x02BC; 395 target[limit++] = 0x006E; 396 break; 397 398 case 0x014A: 399 case 0x014C: 400 case 0x014E: 401 case 0x0150: 402 case 0x0152: 403 case 0x0154: 404 case 0x0156: 405 case 0x0158: 406 case 0x015A: 407 case 0x015C: 408 case 0x015E: 409 case 0x0160: 410 case 0x0162: 411 case 0x0164: 412 case 0x0166: 413 case 0x0168: 414 case 0x016A: 415 case 0x016C: 416 case 0x016E: 417 case 0x0170: 418 case 0x0172: 419 case 0x0174: 420 case 0x0176: 421 // For case ignore, numeric, and stored prefix string matching rules, 422 // characters are case folded per B.2 of [RFC3454] : U+0141-0176 423 target[limit++] = ( char ) ( c + 0x0001 ); 424 break; 425 426 case 0x0178: 427 // For case ignore, numeric, and stored prefix string matching rules, 428 // characters are case folded per B.2 of [RFC3454] : U+0178 429 target[limit++] = 0x00FF; 430 break; 431 432 case 0x0179: 433 case 0x017B: 434 case 0x017D: 435 // For case ignore, numeric, and stored prefix string matching rules, 436 // characters are case folded per B.2 of [RFC3454] : U+0179-017D 437 target[limit++] = ( char ) ( c + 0x0001 ); 438 break; 439 440 case 0x017F: 441 // For case ignore, numeric, and stored prefix string matching rules, 442 // characters are case folded per B.2 of [RFC3454] : U+017F 443 target[limit++] = 0x0073; 444 break; 445 446 case 0x0181: 447 // For case ignore, numeric, and stored prefix string matching rules, 448 // characters are case folded per B.2 of [RFC3454] : U+0181 449 target[limit++] = 0x0253; 450 break; 451 452 case 0x0182: 453 case 0x0184: 454 // For case ignore, numeric, and stored prefix string matching rules, 455 // characters are case folded per B.2 of [RFC3454] : U+0182, U+0x0184 456 target[limit++] = ( char ) ( c + 0x0001 ); 457 break; 458 459 case 0x0186: 460 // For case ignore, numeric, and stored prefix string matching rules, 461 // characters are case folded per B.2 of [RFC3454] : U+0186 462 target[limit++] = 0x0254; 463 break; 464 465 case 0x0187: 466 // For case ignore, numeric, and stored prefix string matching rules, 467 // characters are case folded per B.2 of [RFC3454] : U+0188 468 target[limit++] = 0x0188; 469 break; 470 471 case 0x0189: 472 case 0x018A: 473 // For case ignore, numeric, and stored prefix string matching rules, 474 // characters are case folded per B.2 of [RFC3454] : U+0189, U+018A 475 target[limit++] = ( char ) ( c + 0x00CD ); 476 break; 477 478 case 0x018B: 479 // For case ignore, numeric, and stored prefix string matching rules, 480 // characters are case folded per B.2 of [RFC3454] : U+018B 481 target[limit++] = 0x018C; 482 break; 483 484 case 0x018E: 485 // For case ignore, numeric, and stored prefix string matching rules, 486 // characters are case folded per B.2 of [RFC3454] : U+018E 487 target[limit++] = 0x01DD; 488 break; 489 490 case 0x018F: 491 // For case ignore, numeric, and stored prefix string matching rules, 492 // characters are case folded per B.2 of [RFC3454] : U+018F 493 target[limit++] = 0x0259; 494 break; 495 496 case 0x0190: 497 // For case ignore, numeric, and stored prefix string matching rules, 498 // characters are case folded per B.2 of [RFC3454] : U+0190 499 target[limit++] = 0x025B; 500 break; 501 502 case 0x0191: 503 // For case ignore, numeric, and stored prefix string matching rules, 504 // characters are case folded per B.2 of [RFC3454] : U+0191 505 target[limit++] = 0x0192; 506 break; 507 508 case 0x0193: 509 // For case ignore, numeric, and stored prefix string matching rules, 510 // characters are case folded per B.2 of [RFC3454] : U+0193 511 target[limit++] = 0x0260; 512 break; 513 514 case 0x0194: 515 // For case ignore, numeric, and stored prefix string matching rules, 516 // characters are case folded per B.2 of [RFC3454] : U+0194 517 target[limit++] = 0x0263; 518 break; 519 520 case 0x0196: 521 // For case ignore, numeric, and stored prefix string matching rules, 522 // characters are case folded per B.2 of [RFC3454] : U+0196 523 target[limit++] = 0x0269; 524 break; 525 526 case 0x0197: 527 // For case ignore, numeric, and stored prefix string matching rules, 528 // characters are case folded per B.2 of [RFC3454] : U+0197 529 target[limit++] = 0x0268; 530 break; 531 532 case 0x0198: 533 // For case ignore, numeric, and stored prefix string matching rules, 534 // characters are case folded per B.2 of [RFC3454] : U+0198 535 target[limit++] = 0x0199; 536 break; 537 538 case 0x019C: 539 // For case ignore, numeric, and stored prefix string matching rules, 540 // characters are case folded per B.2 of [RFC3454] : U+019C 541 target[limit++] = 0x026F; 542 break; 543 544 case 0x019D: 545 // For case ignore, numeric, and stored prefix string matching rules, 546 // characters are case folded per B.2 of [RFC3454] : U+019D 547 target[limit++] = 0x0272; 548 break; 549 550 case 0x019F: 551 // For case ignore, numeric, and stored prefix string matching rules, 552 // characters are case folded per B.2 of [RFC3454] : U+019F 553 target[limit++] = 0x0275; 554 break; 555 556 case 0x01A0: 557 case 0x01A2: 558 case 0x01A4: 559 // For case ignore, numeric, and stored prefix string matching rules, 560 // characters are case folded per B.2 of [RFC3454] : U+01A0-U+01A4 561 target[limit++] = ( char ) ( c + 0x0001 ); 562 break; 563 564 case 0x01A6: 565 // For case ignore, numeric, and stored prefix string matching rules, 566 // characters are case folded per B.2 of [RFC3454] : U+01A6 567 target[limit++] = 0x0280; 568 break; 569 570 case 0x01A7: 571 // For case ignore, numeric, and stored prefix string matching rules, 572 // characters are case folded per B.2 of [RFC3454] : U+01A7 573 target[limit++] = 0x01A8; 574 break; 575 576 case 0x01A9: 577 // For case ignore, numeric, and stored prefix string matching rules, 578 // characters are case folded per B.2 of [RFC3454] : U+01A9 579 target[limit++] = 0x0283; 580 break; 581 582 case 0x01AC: 583 // For case ignore, numeric, and stored prefix string matching rules, 584 // characters are case folded per B.2 of [RFC3454] : U+01AC 585 target[limit++] = 0x01AD; 586 break; 587 588 case 0x01AE: 589 // For case ignore, numeric, and stored prefix string matching rules, 590 // characters are case folded per B.2 of [RFC3454] : U+01AE 591 target[limit++] = 0x0288; 592 break; 593 594 case 0x01AF: 595 // For case ignore, numeric, and stored prefix string matching rules, 596 // characters are case folded per B.2 of [RFC3454] : U+01AF 597 target[limit++] = 0x01B0; 598 break; 599 600 case 0x01B1: 601 case 0x01B2: 602 // For case ignore, numeric, and stored prefix string matching rules, 603 // characters are case folded per B.2 of [RFC3454] : U+01AF, U+01B2 604 target[limit++] = ( char ) ( c + 0x00D9 ); 605 break; 606 607 case 0x01B3: 608 case 0x01B5: 609 // For case ignore, numeric, and stored prefix string matching rules, 610 // characters are case folded per B.2 of [RFC3454] : U+01B3, U+01B5 611 target[limit++] = ( char ) ( c + 0x0001 ); 612 break; 613 614 case 0x01B7: 615 // For case ignore, numeric, and stored prefix string matching rules, 616 // characters are case folded per B.2 of [RFC3454] : U+01B7 617 target[limit++] = 0x0292; 618 break; 619 620 case 0x01B8: 621 case 0x01BC: 622 // For case ignore, numeric, and stored prefix string matching rules, 623 // characters are case folded per B.2 of [RFC3454] : U+01B8, U+01BC 624 target[limit++] = ( char ) ( c + 0x0001 ); 625 break; 626 627 case 0x01C4: 628 // For case ignore, numeric, and stored prefix string matching rules, 629 // characters are case folded per B.2 of [RFC3454] : U+01C4,U+01C5 630 target[limit++] = 0x01C6; 631 break; 632 633 case 0x01C7: 634 // For case ignore, numeric, and stored prefix string matching rules, 635 // characters are case folded per B.2 of [RFC3454] : U+01C7,U+01C8 636 target[limit++] = 0x01C9; 637 break; 638 639 case 0x01CA: 640 case 0x01CB: 641 // For case ignore, numeric, and stored prefix string matching rules, 642 // characters are case folded per B.2 of [RFC3454] : U+01CA,U+01CB 643 target[limit++] = 0x01CC; 644 break; 645 646 case 0x01CD: 647 case 0x01CF: 648 case 0x01D1: 649 case 0x01D3: 650 case 0x01D5: 651 case 0x01D7: 652 case 0x01D9: 653 case 0x01DB: 654 case 0x01DE: 655 case 0x01E0: 656 case 0x01E2: 657 case 0x01E4: 658 case 0x01E6: 659 case 0x01E8: 660 case 0x01EA: 661 case 0x01EC: 662 case 0x01EE: 663 // For case ignore, numeric, and stored prefix string matching rules, 664 // characters are case folded per B.2 of [RFC3454] : U+01CD, U+01EE 665 target[limit++] = ( char ) ( c + 0x0001 ); 666 break; 667 668 case 0x01F0: 669 // For case ignore, numeric, and stored prefix string matching rules, 670 // characters are case folded per B.2 of [RFC3454] : U+01F0 671 target[limit++] = 0x006A; 672 target[limit++] = 0x030C; 673 break; 674 675 case 0x01F1: 676 case 0x01F2: 677 // For case ignore, numeric, and stored prefix string matching rules, 678 // characters are case folded per B.2 of [RFC3454] : U+01F1, U+01F2 679 target[limit++] = 0x01F3; 680 break; 681 682 case 0x01F4: 683 // For case ignore, numeric, and stored prefix string matching rules, 684 // characters are case folded per B.2 of [RFC3454] : U+01F4 685 target[limit++] = 0x01F5; 686 break; 687 688 case 0x01F6: 689 // For case ignore, numeric, and stored prefix string matching rules, 690 // characters are case folded per B.2 of [RFC3454] : U+01F6 691 target[limit++] = 0x0195; 692 break; 693 694 case 0x01F7: 695 // For case ignore, numeric, and stored prefix string matching rules, 696 // characters are case folded per B.2 of [RFC3454] : U+01F7 697 target[limit++] = 0x01BF; 698 break; 699 700 case 0x01F8: 701 case 0x01FA: 702 case 0x01FC: 703 case 0x01FE: 704 case 0x0200: 705 case 0x0202: 706 case 0x0204: 707 case 0x0206: 708 case 0x0208: 709 case 0x020A: 710 case 0x020C: 711 case 0x020E: 712 case 0x0210: 713 case 0x0212: 714 case 0x0214: 715 case 0x0216: 716 case 0x0218: 717 case 0x021A: 718 case 0x021C: 719 case 0x021E: 720 // For case ignore, numeric, and stored prefix string matching rules, 721 // characters are case folded per B.2 of [RFC3454] : U+01F8-U+021E 722 target[limit++] = ( char ) ( c + 0x0001 ); 723 break; 724 725 726 case 0x0220: 727 // For case ignore, numeric, and stored prefix string matching rules, 728 // characters are case folded per B.2 of [RFC3454] : U+0220 729 target[limit++] = 0x019E; 730 break; 731 732 case 0x0222: 733 case 0x0224: 734 case 0x0226: 735 case 0x0228: 736 case 0x022A: 737 case 0x022C: 738 case 0x022E: 739 case 0x0230: 740 case 0x0232: 741 // For case ignore, numeric, and stored prefix string matching rules, 742 // characters are case folded per B.2 of [RFC3454] : U+0222-U+0232 743 target[limit++] = ( char ) ( c + 0x0001 ); 744 break; 745 746 case 0x0345: 747 // For case ignore, numeric, and stored prefix string matching rules, 748 // characters are case folded per B.2 of [RFC3454] : U+0220 749 target[limit++] = 0x03B9; 750 break; 751 752 case 0x034F: 753 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 754 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 755 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 756 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 757 // mapped to nothing. 758 break; 759 760 case 0x037A: 761 // For case ignore, numeric, and stored prefix string matching rules, 762 // characters are case folded per B.2 of [RFC3454] : U+037A 763 target[limit++] = 0x0020; 764 target[limit++] = 0x03B9; 765 break; 766 767 case 0x0386: 768 // For case ignore, numeric, and stored prefix string matching rules, 769 // characters are case folded per B.2 of [RFC3454] : U+0386 770 target[limit++] = 0x03AC; 771 break; 772 773 case 0x0388: 774 case 0x0389: 775 case 0x038A: 776 // For case ignore, numeric, and stored prefix string matching rules, 777 // characters are case folded per B.2 of [RFC3454] : U+0388, U+0389, U+038A 778 target[limit++] = ( char ) ( c + 0x0025 ); 779 break; 780 781 case 0x038C: 782 // For case ignore, numeric, and stored prefix string matching rules, 783 // characters are case folded per B.2 of [RFC3454] : U+038C 784 target[limit++] = 0x03CC; 785 break; 786 787 case 0x038E: 788 case 0x038F: 789 // For case ignore, numeric, and stored prefix string matching rules, 790 // characters are case folded per B.2 of [RFC3454] : U+038E, U+038F 791 target[limit++] = ( char ) ( c + 0x0025 ); 792 break; 793 794 case 0x0390: 795 // For case ignore, numeric, and stored prefix string matching rules, 796 // characters are case folded per B.2 of [RFC3454] : U+0390 797 target[limit++] = 0x03B9; 798 target[limit++] = 0x0308; 799 target[limit++] = 0x0301; 800 break; 801 802 case 0x0391: 803 case 0x0392: 804 case 0x0393: 805 case 0x0394: 806 case 0x0395: 807 case 0x0396: 808 case 0x0397: 809 case 0x0398: 810 case 0x0399: 811 case 0x039A: 812 case 0x039B: 813 case 0x039C: 814 case 0x039D: 815 case 0x039E: 816 case 0x039F: 817 case 0x03A0: 818 case 0x03A1: 819 case 0x03A3: 820 case 0x03A4: 821 case 0x03A5: 822 case 0x03A6: 823 case 0x03A7: 824 case 0x03A8: 825 case 0x03A9: 826 case 0x03AA: 827 case 0x03AB: 828 // For case ignore, numeric, and stored prefix string matching rules, 829 // characters are case folded per B.2 of [RFC3454] : U+0391-U+03AB 830 target[limit++] = ( char ) ( c + 0x0020 ); 831 break; 832 833 834 case 0x03B0: 835 // For case ignore, numeric, and stored prefix string matching rules, 836 // characters are case folded per B.2 of [RFC3454] : U+03B0 837 target[limit++] = 0x03C5; 838 target[limit++] = 0x0308; 839 target[limit++] = 0x0301; 840 break; 841 842 case 0x03C2: 843 // For case ignore, numeric, and stored prefix string matching rules, 844 // characters are case folded per B.2 of [RFC3454] : U+03C2 845 target[limit++] = 0x03C3; 846 break; 847 848 case 0x03D0: 849 // For case ignore, numeric, and stored prefix string matching rules, 850 // characters are case folded per B.2 of [RFC3454] : U+03D0 851 target[limit++] = 0x03B2; 852 break; 853 854 case 0x03D1: 855 // For case ignore, numeric, and stored prefix string matching rules, 856 // characters are case folded per B.2 of [RFC3454] : U+03D1 857 target[limit++] = 0x03B8; 858 break; 859 860 case 0x03D2: 861 // For case ignore, numeric, and stored prefix string matching rules, 862 // characters are case folded per B.2 of [RFC3454] : U+03D2 863 target[limit++] = 0x03C5; 864 break; 865 866 case 0x03D3: 867 // For case ignore, numeric, and stored prefix string matching rules, 868 // characters are case folded per B.2 of [RFC3454] : U+03D3 869 target[limit++] = 0x03CD; 870 break; 871 872 case 0x03D4: 873 // For case ignore, numeric, and stored prefix string matching rules, 874 // characters are case folded per B.2 of [RFC3454] : U+03D4 875 target[limit++] = 0x03CB; 876 break; 877 878 case 0x03D5: 879 // For case ignore, numeric, and stored prefix string matching rules, 880 // characters are case folded per B.2 of [RFC3454] : U+03D5 881 target[limit++] = 0x03C6; 882 break; 883 884 case 0x03D6: 885 // For case ignore, numeric, and stored prefix string matching rules, 886 // characters are case folded per B.2 of [RFC3454] : U+03D6 887 target[limit++] = 0x03C0; 888 break; 889 890 case 0x03D8: 891 case 0x03DA: 892 case 0x03DC: 893 case 0x03DE: 894 case 0x03E0: 895 case 0x03E2: 896 case 0x03E4: 897 case 0x03E6: 898 case 0x03E8: 899 case 0x03EA: 900 case 0x03EC: 901 case 0x03EE: 902 // For case ignore, numeric, and stored prefix string matching rules, 903 // characters are case folded per B.2 of [RFC3454] : U+03D8-U+03EE 904 target[limit++] = ( char ) ( c + 0x0001 ); 905 break; 906 907 case 0x03F0: 908 // For case ignore, numeric, and stored prefix string matching rules, 909 // characters are case folded per B.2 of [RFC3454] : U+03F0 910 target[limit++] = 0x03BA; 911 break; 912 913 case 0x03F1: 914 // For case ignore, numeric, and stored prefix string matching rules, 915 // characters are case folded per B.2 of [RFC3454] : U+03F1 916 target[limit++] = 0x03C1; 917 break; 918 919 case 0x03F2: 920 // For case ignore, numeric, and stored prefix string matching rules, 921 // characters are case folded per B.2 of [RFC3454] : U+03F2 922 target[limit++] = 0x03C3; 923 break; 924 925 case 0x03F4: 926 // For case ignore, numeric, and stored prefix string matching rules, 927 // characters are case folded per B.2 of [RFC3454] : U+03F4 928 target[limit++] = 0x03B8; 929 break; 930 931 case 0x03F5: 932 // For case ignore, numeric, and stored prefix string matching rules, 933 // characters are case folded per B.2 of [RFC3454] : U+03F5 934 target[limit++] = 0x03B5; 935 break; 936 937 case 0x0400: 938 case 0x0401: 939 case 0x0402: 940 case 0x0403: 941 case 0x0404: 942 case 0x0405: 943 case 0x0406: 944 case 0x0407: 945 case 0x0408: 946 case 0x0409: 947 case 0x040A: 948 case 0x040B: 949 case 0x040C: 950 case 0x040D: 951 case 0x040E: 952 case 0x040F: 953 // For case ignore, numeric, and stored prefix string matching rules, 954 // characters are case folded per B.2 of [RFC3454] : U+0400-U+040F 955 target[limit++] = ( char ) ( c + 0x0050 ); 956 break; 957 958 case 0x0410: 959 case 0x0411: 960 case 0x0412: 961 case 0x0413: 962 case 0x0414: 963 case 0x0415: 964 case 0x0416: 965 case 0x0417: 966 case 0x0418: 967 case 0x0419: 968 case 0x041A: 969 case 0x041B: 970 case 0x041C: 971 case 0x041D: 972 case 0x041E: 973 case 0x041F: 974 case 0x0420: 975 case 0x0421: 976 case 0x0422: 977 case 0x0423: 978 case 0x0424: 979 case 0x0425: 980 case 0x0426: 981 case 0x0427: 982 case 0x0428: 983 case 0x0429: 984 case 0x042A: 985 case 0x042B: 986 case 0x042C: 987 case 0x042D: 988 case 0x042E: 989 case 0x042F: 990 // For case ignore, numeric, and stored prefix string matching rules, 991 // characters are case folded per B.2 of [RFC3454] : U+0410-U+042F 992 target[limit++] = ( char ) ( c + 0x0020 ); 993 break; 994 995 case 0x0460: 996 case 0x0462: 997 case 0x0464: 998 case 0x0466: 999 case 0x0468: 1000 case 0x046A: 1001 case 0x046C: 1002 case 0x046E: 1003 case 0x0470: 1004 case 0x0472: 1005 case 0x0474: 1006 case 0x0476: 1007 case 0x0478: 1008 case 0x047A: 1009 case 0x047C: 1010 case 0x047E: 1011 case 0x0480: 1012 case 0x048A: 1013 case 0x048C: 1014 case 0x048E: 1015 case 0x0490: 1016 case 0x0492: 1017 case 0x0494: 1018 case 0x0496: 1019 case 0x0498: 1020 case 0x049A: 1021 case 0x049C: 1022 case 0x049E: 1023 case 0x04A0: 1024 case 0x04A2: 1025 case 0x04A4: 1026 case 0x04A6: 1027 case 0x04A8: 1028 case 0x04AA: 1029 case 0x04AC: 1030 case 0x04AE: 1031 case 0x04B0: 1032 case 0x04B2: 1033 case 0x04B4: 1034 case 0x04B6: 1035 case 0x04B8: 1036 case 0x04BA: 1037 case 0x04BC: 1038 case 0x04BE: 1039 case 0x04C1: 1040 case 0x04C3: 1041 case 0x04C5: 1042 case 0x04C7: 1043 case 0x04C9: 1044 case 0x04CB: 1045 case 0x04CD: 1046 case 0x04D0: 1047 case 0x04D2: 1048 case 0x04D4: 1049 case 0x04D6: 1050 case 0x04D8: 1051 case 0x04DA: 1052 case 0x04DC: 1053 case 0x04DE: 1054 case 0x04E0: 1055 case 0x04E2: 1056 case 0x04E4: 1057 case 0x04E6: 1058 case 0x04E8: 1059 case 0x04EA: 1060 case 0x04EC: 1061 case 0x04EE: 1062 case 0x04F0: 1063 case 0x04F2: 1064 case 0x04F4: 1065 case 0x04F8: 1066 case 0x0500: 1067 case 0x0502: 1068 case 0x0504: 1069 case 0x0506: 1070 case 0x0508: 1071 case 0x050A: 1072 case 0x050C: 1073 case 0x050E: 1074 // For case ignore, numeric, and stored prefix string matching rules, 1075 // characters are case folded per B.2 of [RFC3454] : U+0460-U+050E 1076 target[limit++] = ( char ) ( c + 0x0001 ); 1077 break; 1078 1079 case 0x0531: 1080 case 0x0532: 1081 case 0x0533: 1082 case 0x0534: 1083 case 0x0535: 1084 case 0x0536: 1085 case 0x0537: 1086 case 0x0538: 1087 case 0x0539: 1088 case 0x053A: 1089 case 0x053B: 1090 case 0x053C: 1091 case 0x053D: 1092 case 0x053E: 1093 case 0x053F: 1094 case 0x0540: 1095 case 0x0541: 1096 case 0x0542: 1097 case 0x0543: 1098 case 0x0544: 1099 case 0x0545: 1100 case 0x0546: 1101 case 0x0547: 1102 case 0x0548: 1103 case 0x0549: 1104 case 0x054A: 1105 case 0x054B: 1106 case 0x054C: 1107 case 0x054D: 1108 case 0x054E: 1109 case 0x054F: 1110 case 0x0550: 1111 case 0x0551: 1112 case 0x0552: 1113 case 0x0553: 1114 case 0x0554: 1115 case 0x0555: 1116 case 0x0556: 1117 // For case ignore, numeric, and stored prefix string matching rules, 1118 // characters are case folded per B.2 of [RFC3454] : U+0531-U+0556 1119 target[limit++] = ( char ) ( c + 0x0030 ); 1120 break; 1121 1122 1123 case 0x0587: 1124 // For case ignore, numeric, and stored prefix string matching rules, 1125 // characters are case folded per B.2 of [RFC3454] : U+0587 1126 target[limit++] = 0x0565; 1127 target[limit++] = 0x0582; 1128 break; 1129 1130 case 0x06DD: 1131 case 0x070F: 1132 // All other control code (e.g., Cc) points or code points with a 1133 // control function (e.g., Cf) are mapped to nothing. The following is 1134 // a complete list of these code points: ... U+06DD-070F... 1135 break; 1136 1137 case 0x1680: 1138 // All other code points with Separator (space, line, or paragraph) property 1139 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 1140 // list of these code points: ...1680... 1141 target[limit++] = 0x0020; 1142 break; 1143 1144 case 0x1806: 1145 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 1146 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 1147 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 1148 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 1149 // mapped to nothing. 1150 break; 1151 1152 case 0x180B: 1153 case 0x180C: 1154 case 0x180D: 1155 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 1156 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 1157 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 1158 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 1159 // mapped to nothing. 1160 break; 1161 1162 case 0x180E: 1163 // All other control code (e.g., Cc) points or code points with a 1164 // control function (e.g., Cf) are mapped to nothing. The following is 1165 // a complete list of these code points: ... U+180E... 1166 break; 1167 1168 case 0x1E00: 1169 case 0x1E02: 1170 case 0x1E04: 1171 case 0x1E06: 1172 case 0x1E08: 1173 case 0x1E0A: 1174 case 0x1E0C: 1175 case 0x1E0E: 1176 case 0x1E10: 1177 case 0x1E12: 1178 case 0x1E14: 1179 case 0x1E16: 1180 case 0x1E18: 1181 case 0x1E1A: 1182 case 0x1E1C: 1183 case 0x1E1E: 1184 case 0x1E20: 1185 case 0x1E22: 1186 case 0x1E24: 1187 case 0x1E26: 1188 case 0x1E28: 1189 case 0x1E2A: 1190 case 0x1E2C: 1191 case 0x1E2E: 1192 case 0x1E30: 1193 case 0x1E32: 1194 case 0x1E34: 1195 case 0x1E36: 1196 case 0x1E38: 1197 case 0x1E3A: 1198 case 0x1E3C: 1199 case 0x1E3E: 1200 case 0x1E40: 1201 case 0x1E42: 1202 case 0x1E44: 1203 case 0x1E46: 1204 case 0x1E48: 1205 case 0x1E4A: 1206 case 0x1E4C: 1207 case 0x1E4E: 1208 case 0x1E50: 1209 case 0x1E52: 1210 case 0x1E54: 1211 case 0x1E56: 1212 case 0x1E58: 1213 case 0x1E5A: 1214 case 0x1E5C: 1215 case 0x1E5E: 1216 case 0x1E60: 1217 case 0x1E62: 1218 case 0x1E64: 1219 case 0x1E66: 1220 case 0x1E68: 1221 case 0x1E6A: 1222 case 0x1E6C: 1223 case 0x1E6E: 1224 case 0x1E70: 1225 case 0x1E72: 1226 case 0x1E74: 1227 case 0x1E76: 1228 case 0x1E78: 1229 case 0x1E7A: 1230 case 0x1E7C: 1231 case 0x1E7E: 1232 case 0x1E80: 1233 case 0x1E82: 1234 case 0x1E84: 1235 case 0x1E86: 1236 case 0x1E88: 1237 case 0x1E8A: 1238 case 0x1E8C: 1239 case 0x1E8E: 1240 case 0x1E90: 1241 case 0x1E92: 1242 case 0x1E94: 1243 // For case ignore, numeric, and stored prefix string matching rules, 1244 // characters are case folded per B.2 of [RFC3454] : U+1E00-U+1E94 1245 target[limit++] = ( char ) ( c + 0x0001 ); 1246 break; 1247 1248 case 0x1E96: 1249 // For case ignore, numeric, and stored prefix string matching rules, 1250 // characters are case folded per B.2 of [RFC3454] : U+1E96 1251 target[limit++] = 0x0068; 1252 target[limit++] = 0x0331; 1253 break; 1254 1255 case 0x1E97: 1256 // For case ignore, numeric, and stored prefix string matching rules, 1257 // characters are case folded per B.2 of [RFC3454] : U+1E97 1258 target[limit++] = 0x0074; 1259 target[limit++] = 0x0308; 1260 break; 1261 1262 case 0x1E98: 1263 // For case ignore, numeric, and stored prefix string matching rules, 1264 // characters are case folded per B.2 of [RFC3454] : U+1E98 1265 target[limit++] = 0x0077; 1266 target[limit++] = 0x030A; 1267 break; 1268 1269 case 0x1E99: 1270 // For case ignore, numeric, and stored prefix string matching rules, 1271 // characters are case folded per B.2 of [RFC3454] : U+1E99 1272 target[limit++] = 0x0079; 1273 target[limit++] = 0x030A; 1274 break; 1275 1276 case 0x1E9A: 1277 // For case ignore, numeric, and stored prefix string matching rules, 1278 // characters are case folded per B.2 of [RFC3454] : U+1E9A 1279 target[limit++] = 0x0061; 1280 target[limit++] = 0x02BE; 1281 break; 1282 1283 case 0x1E9B: 1284 // For case ignore, numeric, and stored prefix string matching rules, 1285 // characters are case folded per B.2 of [RFC3454] : U+1E9B 1286 target[limit++] = 0x1E61; 1287 break; 1288 1289 case 0x1EA0: 1290 case 0x1EA2: 1291 case 0x1EA4: 1292 case 0x1EA6: 1293 case 0x1EA8: 1294 case 0x1EAA: 1295 case 0x1EAC: 1296 case 0x1EAE: 1297 case 0x1EB0: 1298 case 0x1EB2: 1299 case 0x1EB4: 1300 case 0x1EB6: 1301 case 0x1EB8: 1302 case 0x1EBA: 1303 case 0x1EBC: 1304 case 0x1EBE: 1305 case 0x1EC0: 1306 case 0x1EC2: 1307 case 0x1EC4: 1308 case 0x1EC6: 1309 case 0x1EC8: 1310 case 0x1ECA: 1311 case 0x1ECC: 1312 case 0x1ECE: 1313 case 0x1ED0: 1314 case 0x1ED2: 1315 case 0x1ED4: 1316 case 0x1ED6: 1317 case 0x1ED8: 1318 case 0x1EDA: 1319 case 0x1EDC: 1320 case 0x1EDE: 1321 case 0x1EE0: 1322 case 0x1EE2: 1323 case 0x1EE4: 1324 case 0x1EE6: 1325 case 0x1EE8: 1326 case 0x1EEA: 1327 case 0x1EEC: 1328 case 0x1EEE: 1329 case 0x1EF0: 1330 case 0x1EF2: 1331 case 0x1EF4: 1332 case 0x1EF6: 1333 case 0x1EF8: 1334 // For case ignore, numeric, and stored prefix string matching rules, 1335 // characters are case folded per B.2 of [RFC3454] : U+1EA0-U+1EF8 1336 target[limit++] = ( char ) ( c + 0x0001 ); 1337 break; 1338 1339 case 0x1F08: 1340 case 0x1F09: 1341 case 0x1F0A: 1342 case 0x1F0B: 1343 case 0x1F0C: 1344 case 0x1F0D: 1345 case 0x1F0E: 1346 case 0x1F0F: 1347 case 0x1F18: 1348 case 0x1F19: 1349 case 0x1F1A: 1350 case 0x1F1B: 1351 case 0x1F1C: 1352 case 0x1F1D: 1353 case 0x1F28: 1354 case 0x1F29: 1355 case 0x1F2A: 1356 case 0x1F2B: 1357 case 0x1F2C: 1358 case 0x1F2D: 1359 case 0x1F2E: 1360 case 0x1F2F: 1361 case 0x1F38: 1362 case 0x1F39: 1363 case 0x1F3A: 1364 case 0x1F3B: 1365 case 0x1F3C: 1366 case 0x1F3D: 1367 case 0x1F3E: 1368 case 0x1F3F: 1369 case 0x1F48: 1370 case 0x1F49: 1371 case 0x1F4A: 1372 case 0x1F4B: 1373 case 0x1F4C: 1374 case 0x1F4D: 1375 // For case ignore, numeric, and stored prefix string matching rules, 1376 // characters are case folded per B.2 of [RFC3454] : U+1F08-U+1F4D 1377 target[limit++] = ( char ) ( c - 0x0008 ); 1378 break; 1379 1380 case 0x1F50: 1381 // For case ignore, numeric, and stored prefix string matching rules, 1382 // characters are case folded per B.2 of [RFC3454] : U+1F50 1383 target[limit++] = 0x03C5; 1384 target[limit++] = 0x0313; 1385 break; 1386 1387 case 0x1F52: 1388 // For case ignore, numeric, and stored prefix string matching rules, 1389 // characters are case folded per B.2 of [RFC3454] : U+1F52 1390 target[limit++] = 0x03C5; 1391 target[limit++] = 0x0313; 1392 target[limit++] = 0x0300; 1393 break; 1394 1395 case 0x1F54: 1396 // For case ignore, numeric, and stored prefix string matching rules, 1397 // characters are case folded per B.2 of [RFC3454] : U+1F54 1398 target[limit++] = 0x03C5; 1399 target[limit++] = 0x0313; 1400 target[limit++] = 0x0301; 1401 break; 1402 1403 case 0x1F56: 1404 // For case ignore, numeric, and stored prefix string matching rules, 1405 // characters are case folded per B.2 of [RFC3454] : U+1F56 1406 target[limit++] = 0x03C5; 1407 target[limit++] = 0x0313; 1408 target[limit++] = 0x0342; 1409 break; 1410 1411 case 0x1F59: 1412 case 0x1F5B: 1413 case 0x1F5D: 1414 case 0x1F5F: 1415 case 0x1F68: 1416 case 0x1F69: 1417 case 0x1F6A: 1418 case 0x1F6B: 1419 case 0x1F6C: 1420 case 0x1F6D: 1421 case 0x1F6E: 1422 case 0x1F6F: 1423 // For case ignore, numeric, and stored prefix string matching rules, 1424 // characters are case folded per B.2 of [RFC3454] : U+1F59-U+1F6F 1425 target[limit++] = ( char ) ( c - 0x0008 ); 1426 break; 1427 1428 case 0x1F80: 1429 case 0x1F81: 1430 case 0x1F82: 1431 case 0x1F83: 1432 case 0x1F84: 1433 case 0x1F85: 1434 case 0x1F86: 1435 case 0x1F87: 1436 // For case ignore, numeric, and stored prefix string matching rules, 1437 // characters are case folded per B.2 of [RFC3454] : U+1F80-U+1F87 1438 target[limit++] = ( char ) ( c - 0x0080 ); 1439 target[limit++] = 0x03B9; 1440 break; 1441 1442 case 0x1F88: 1443 case 0x1F89: 1444 case 0x1F8A: 1445 case 0x1F8B: 1446 case 0x1F8C: 1447 case 0x1F8D: 1448 case 0x1F8E: 1449 case 0x1F8F: 1450 // For case ignore, numeric, and stored prefix string matching rules, 1451 // characters are case folded per B.2 of [RFC3454] : U+1F88-U+1F8F 1452 target[limit++] = ( char ) ( c - 0x0088 ); 1453 target[limit++] = 0x03B9; 1454 break; 1455 1456 case 0x1F90: 1457 case 0x1F91: 1458 case 0x1F92: 1459 case 0x1F93: 1460 case 0x1F94: 1461 case 0x1F95: 1462 case 0x1F96: 1463 case 0x1F97: 1464 // For case ignore, numeric, and stored prefix string matching rules, 1465 // characters are case folded per B.2 of [RFC3454] : U+1F90-U+1F97 1466 target[limit++] = ( char ) ( c - 0x0070 ); 1467 target[limit++] = 0x03B9; 1468 break; 1469 1470 case 0x1F98: 1471 case 0x1F99: 1472 case 0x1F9A: 1473 case 0x1F9B: 1474 case 0x1F9C: 1475 case 0x1F9D: 1476 case 0x1F9E: 1477 case 0x1F9F: 1478 // For case ignore, numeric, and stored prefix string matching rules, 1479 // characters are case folded per B.2 of [RFC3454] : U+1F98-U+1F9F 1480 target[limit++] = ( char ) ( c - 0x0078 ); 1481 target[limit++] = 0x03B9; 1482 break; 1483 1484 case 0x1FA0: 1485 case 0x1FA1: 1486 case 0x1FA2: 1487 case 0x1FA3: 1488 case 0x1FA4: 1489 case 0x1FA5: 1490 case 0x1FA6: 1491 case 0x1FA7: 1492 // For case ignore, numeric, and stored prefix string matching rules, 1493 // characters are case folded per B.2 of [RFC3454] : U+1FA0-U+1FA7 1494 target[limit++] = ( char ) ( c - 0x0040 ); 1495 target[limit++] = 0x03B9; 1496 break; 1497 1498 case 0x1FA8: 1499 case 0x1FA9: 1500 case 0x1FAA: 1501 case 0x1FAB: 1502 case 0x1FAC: 1503 case 0x1FAD: 1504 case 0x1FAE: 1505 case 0x1FAF: 1506 // For case ignore, numeric, and stored prefix string matching rules, 1507 // characters are case folded per B.2 of [RFC3454] : U+1FA8-U+1FAF 1508 target[limit++] = ( char ) ( c - 0x0048 ); 1509 target[limit++] = 0x03B9; 1510 break; 1511 1512 case 0x1FB2: 1513 // For case ignore, numeric, and stored prefix string matching rules, 1514 // characters are case folded per B.2 of [RFC3454] : U+1FB2 1515 target[limit++] = 0x1F70; 1516 target[limit++] = 0x03B9; 1517 break; 1518 1519 case 0x1FB3: 1520 // For case ignore, numeric, and stored prefix string matching rules, 1521 // characters are case folded per B.2 of [RFC3454] : U+1FB3 1522 target[limit++] = 0x03B1; 1523 target[limit++] = 0x03B9; 1524 break; 1525 1526 case 0x1FB4: 1527 // For case ignore, numeric, and stored prefix string matching rules, 1528 // characters are case folded per B.2 of [RFC3454] : U+1FB4 1529 target[limit++] = 0x03AC; 1530 target[limit++] = 0x03B9; 1531 break; 1532 1533 case 0x1FB6: 1534 // For case ignore, numeric, and stored prefix string matching rules, 1535 // characters are case folded per B.2 of [RFC3454] : U+1FB6 1536 target[limit++] = 0x03B1; 1537 target[limit++] = 0x0342; 1538 break; 1539 1540 case 0x1FB7: 1541 // For case ignore, numeric, and stored prefix string matching rules, 1542 // characters are case folded per B.2 of [RFC3454] : U+1FB7 1543 target[limit++] = 0x03B1; 1544 target[limit++] = 0x0342; 1545 target[limit++] = 0x03B9; 1546 break; 1547 1548 case 0x1FB8: 1549 case 0x1FB9: 1550 // For case ignore, numeric, and stored prefix string matching rules, 1551 // characters are case folded per B.2 of [RFC3454] : U+1FB8,U+1FB9 1552 target[limit++] = ( char ) ( c - 0x0008 ); 1553 break; 1554 1555 case 0x1FBA: 1556 case 0x1FBB: 1557 // For case ignore, numeric, and stored prefix string matching rules, 1558 // characters are case folded per B.2 of [RFC3454] : U+1FBA,U+1FBB 1559 target[limit++] = ( char ) ( c - 0x004A ); 1560 target[limit++] = 0x1F70; 1561 break; 1562 1563 case 0x1FBC: 1564 // For case ignore, numeric, and stored prefix string matching rules, 1565 // characters are case folded per B.2 of [RFC3454] : U+1FBC 1566 target[limit++] = 0x03B1; 1567 target[limit++] = 0x03B9; 1568 break; 1569 1570 case 0x1FBE: 1571 // For case ignore, numeric, and stored prefix string matching rules, 1572 // characters are case folded per B.2 of [RFC3454] : U+1FBE 1573 target[limit++] = 0x03B9; 1574 break; 1575 1576 case 0x1FC2: 1577 // For case ignore, numeric, and stored prefix string matching rules, 1578 // characters are case folded per B.2 of [RFC3454] : U+1FC2 1579 target[limit++] = 0x1F74; 1580 target[limit++] = 0x03B9; 1581 break; 1582 1583 case 0x1FC3: 1584 // For case ignore, numeric, and stored prefix string matching rules, 1585 // characters are case folded per B.2 of [RFC3454] : U+1FC3 1586 target[limit++] = 0x03B7; 1587 target[limit++] = 0x03B9; 1588 break; 1589 1590 case 0x1FC4: 1591 // For case ignore, numeric, and stored prefix string matching rules, 1592 // characters are case folded per B.2 of [RFC3454] : U+1FC4 1593 target[limit++] = 0x03AE; 1594 target[limit++] = 0x03B9; 1595 break; 1596 1597 case 0x1FC6: 1598 // For case ignore, numeric, and stored prefix string matching rules, 1599 // characters are case folded per B.2 of [RFC3454] : U+1FC6 1600 target[limit++] = 0x03B7; 1601 target[limit++] = 0x0342; 1602 break; 1603 1604 case 0x1FC7: 1605 // For case ignore, numeric, and stored prefix string matching rules, 1606 // characters are case folded per B.2 of [RFC3454] : U+1FC7 1607 target[limit++] = 0x03B7; 1608 target[limit++] = 0x0342; 1609 target[limit++] = 0x03B9; 1610 break; 1611 1612 case 0x1FC8: 1613 case 0x1FC9: 1614 case 0x1FCA: 1615 case 0x1FCB: 1616 // For case ignore, numeric, and stored prefix string matching rules, 1617 // characters are case folded per B.2 of [RFC3454] : U+1FC8-U+01FCB 1618 target[limit++] = ( char ) ( c - 0x0056 ); 1619 target[limit++] = 0x1F72; 1620 break; 1621 1622 case 0x1FCC: 1623 // For case ignore, numeric, and stored prefix string matching rules, 1624 // characters are case folded per B.2 of [RFC3454] : U+1FCC 1625 target[limit++] = 0x03B7; 1626 target[limit++] = 0x03B9; 1627 break; 1628 1629 case 0x1FD2: 1630 // For case ignore, numeric, and stored prefix string matching rules, 1631 // characters are case folded per B.2 of [RFC3454] : U+1FD2 1632 target[limit++] = 0x03B9; 1633 target[limit++] = 0x0308; 1634 target[limit++] = 0x0300; 1635 break; 1636 1637 case 0x1FD3: 1638 // For case ignore, numeric, and stored prefix string matching rules, 1639 // characters are case folded per B.2 of [RFC3454] : U+1FD3 1640 target[limit++] = 0x03B9; 1641 target[limit++] = 0x0308; 1642 target[limit++] = 0x0301; 1643 break; 1644 1645 case 0x1FD6: 1646 // For case ignore, numeric, and stored prefix string matching rules, 1647 // characters are case folded per B.2 of [RFC3454] : U+1FD6 1648 target[limit++] = 0x03B9; 1649 target[limit++] = 0x0342; 1650 break; 1651 1652 case 0x1FD7: 1653 // For case ignore, numeric, and stored prefix string matching rules, 1654 // characters are case folded per B.2 of [RFC3454] : U+1FD7 1655 target[limit++] = 0x03B9; 1656 target[limit++] = 0x0308; 1657 target[limit++] = 0x0342; 1658 break; 1659 1660 case 0x1FD8: 1661 case 0x1FD9: 1662 // For case ignore, numeric, and stored prefix string matching rules, 1663 // characters are case folded per B.2 of [RFC3454] : U+1FD8-U+01FD9 1664 target[limit++] = ( char ) ( c - 0x0008 ); 1665 break; 1666 1667 case 0x1FDA: 1668 case 0x1FDB: 1669 // For case ignore, numeric, and stored prefix string matching rules, 1670 // characters are case folded per B.2 of [RFC3454] : U+1FD8-U+01FD9 1671 target[limit++] = ( char ) ( c - 0x0064 ); 1672 break; 1673 1674 case 0x1FE2: 1675 // For case ignore, numeric, and stored prefix string matching rules, 1676 // characters are case folded per B.2 of [RFC3454] : U+1FE2 1677 target[limit++] = 0x03C5; 1678 target[limit++] = 0x0308; 1679 target[limit++] = 0x0300; 1680 break; 1681 1682 case 0x1FE3: 1683 // For case ignore, numeric, and stored prefix string matching rules, 1684 // characters are case folded per B.2 of [RFC3454] : U+1FE3 1685 target[limit++] = 0x03C5; 1686 target[limit++] = 0x0308; 1687 target[limit++] = 0x0301; 1688 break; 1689 1690 case 0x1FE4: 1691 // For case ignore, numeric, and stored prefix string matching rules, 1692 // characters are case folded per B.2 of [RFC3454] : U+1FE4 1693 target[limit++] = 0x03C1; 1694 target[limit++] = 0x0313; 1695 break; 1696 1697 case 0x1FE6: 1698 // For case ignore, numeric, and stored prefix string matching rules, 1699 // characters are case folded per B.2 of [RFC3454] : U+1FE6 1700 target[limit++] = 0x03C5; 1701 target[limit++] = 0x0342; 1702 break; 1703 1704 case 0x1FE7: 1705 // For case ignore, numeric, and stored prefix string matching rules, 1706 // characters are case folded per B.2 of [RFC3454] : U+1FE7 1707 target[limit++] = 0x03C5; 1708 target[limit++] = 0x0308; 1709 target[limit++] = 0x0342; 1710 break; 1711 1712 case 0x1FE8: 1713 case 0x1FE9: 1714 // For case ignore, numeric, and stored prefix string matching rules, 1715 // characters are case folded per B.2 of [RFC3454] : U+1FE8-U+01FE9 1716 target[limit++] = ( char ) ( c - 0x0008 ); 1717 break; 1718 1719 case 0x1FEA: 1720 case 0x1FEB: 1721 // For case ignore, numeric, and stored prefix string matching rules, 1722 // characters are case folded per B.2 of [RFC3454] : U+1FEA-U+01FEB 1723 target[limit++] = ( char ) ( c - 0x0070 ); 1724 break; 1725 1726 case 0x1FEC: 1727 // For case ignore, numeric, and stored prefix string matching rules, 1728 // characters are case folded per B.2 of [RFC3454] : U+1FEC 1729 target[limit++] = 0x1FE5; 1730 break; 1731 1732 case 0x1FF2: 1733 // For case ignore, numeric, and stored prefix string matching rules, 1734 // characters are case folded per B.2 of [RFC3454] : U+1FF2 1735 target[limit++] = 0x1F7C; 1736 target[limit++] = 0x03B9; 1737 break; 1738 1739 case 0x1FF3: 1740 // For case ignore, numeric, and stored prefix string matching rules, 1741 // characters are case folded per B.2 of [RFC3454] : U+1FF3 1742 target[limit++] = 0x03C9; 1743 target[limit++] = 0x03B9; 1744 break; 1745 1746 case 0x1FF4: 1747 // For case ignore, numeric, and stored prefix string matching rules, 1748 // characters are case folded per B.2 of [RFC3454] : U+1FF4 1749 target[limit++] = 0x03CE; 1750 target[limit++] = 0x03B9; 1751 break; 1752 1753 case 0x1FF6: 1754 // For case ignore, numeric, and stored prefix string matching rules, 1755 // characters are case folded per B.2 of [RFC3454] : U+1FF6 1756 target[limit++] = 0x03C9; 1757 target[limit++] = 0x0342; 1758 break; 1759 1760 case 0x1FF7: 1761 // For case ignore, numeric, and stored prefix string matching rules, 1762 // characters are case folded per B.2 of [RFC3454] : U+1FF7 1763 target[limit++] = 0x03C9; 1764 target[limit++] = 0x0342; 1765 target[limit++] = 0x03B9; 1766 break; 1767 1768 case 0x1FF8: 1769 case 0x1FF9: 1770 // For case ignore, numeric, and stored prefix string matching rules, 1771 // characters are case folded per B.2 of [RFC3454] : U+1FF8-U+01FF9 1772 target[limit++] = ( char ) ( c - 0x0080 ); 1773 break; 1774 1775 case 0x1FFA: 1776 case 0x1FFB: 1777 // For case ignore, numeric, and stored prefix string matching rules, 1778 // characters are case folded per B.2 of [RFC3454] : U+1FFA-U+01FFB 1779 target[limit++] = ( char ) ( c - 0x007E ); 1780 target[limit++] = 0x1F7C; 1781 break; 1782 1783 case 0x1FFC: 1784 // For case ignore, numeric, and stored prefix string matching rules, 1785 // characters are case folded per B.2 of [RFC3454] : U+1FFC 1786 target[limit++] = 0x03C9; 1787 target[limit++] = 0x03B9; 1788 break; 1789 1790 case 0x2000: 1791 case 0x2001: 1792 case 0x2002: 1793 case 0x2003: 1794 case 0x2004: 1795 case 0x2005: 1796 case 0x2006: 1797 case 0x2007: 1798 case 0x2008: 1799 case 0x2009: 1800 case 0x200A: 1801 // All other code points with Separator (space, line, or paragraph) property 1802 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 1803 // list of these code points: ...2000-200A... 1804 target[limit++] = 0x0020; 1805 break; 1806 1807 case 0x200B: 1808 // ZERO WIDTH SPACE (U+200B) is mapped to nothing. 1809 break; 1810 1811 case 0x200C: 1812 case 0x200D: 1813 case 0x200E: 1814 case 0x200F: 1815 // All other control code (e.g., Cc) points or code points with a 1816 // control function (e.g., Cf) are mapped to nothing. The following is 1817 // a complete list of these code points: ... U+200C-200FF... 1818 break; 1819 1820 case 0x2028: 1821 case 0x2029: 1822 // All other code points with Separator (space, line, or paragraph) property 1823 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 1824 // list of these code points: ... 2028-2029... 1825 target[limit++] = 0x0020; 1826 break; 1827 1828 case 0x202A: 1829 case 0x202B: 1830 case 0x202C: 1831 case 0x202D: 1832 case 0x202E: 1833 // All other control code (e.g., Cc) points or code points with a 1834 // control function (e.g., Cf) are mapped to nothing. The following is 1835 // a complete list of these code points: ... U+202A-202E... 1836 break; 1837 1838 case 0x202F: 1839 // All other code points with Separator (space, line, or paragraph) property 1840 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 1841 // list of these code points: ... 202F ... 1842 target[limit++] = 0x0020; 1843 break; 1844 1845 case 0x205F: 1846 // All other code points with Separator (space, line, or paragraph) property 1847 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 1848 // list of these code points:...205F... 1849 target[limit++] = 0x0020; 1850 break; 1851 1852 case 0x2060: 1853 case 0x2061: 1854 case 0x2062: 1855 case 0x2063: 1856 // All other control code (e.g., Cc) points or code points with a 1857 // control function (e.g., Cf) are mapped to nothing. The following is 1858 // a complete list of these code points: ... U+2060-2063... 1859 break; 1860 1861 case 0x206A: 1862 case 0x206B: 1863 case 0x206C: 1864 case 0x206D: 1865 case 0x206E: 1866 case 0x206F: 1867 // All other control code (e.g., Cc) points or code points with a 1868 // control function (e.g., Cf) are mapped to nothing. The following is 1869 // a complete list of these code points: ... U+20GA-20GFF... 1870 break; 1871 1872 case 0x20A8: 1873 // For case ignore, numeric, and stored prefix string matching rules, 1874 // characters are case folded per B.2 of [RFC3454] : U+20A8 1875 target[limit++] = 0x0072; 1876 target[limit++] = 0x0073; 1877 break; 1878 1879 case 0x2102: 1880 // For case ignore, numeric, and stored prefix string matching rules, 1881 // characters are case folded per B.2 of [RFC3454] : U+2102 1882 target[limit++] = 0x0063; 1883 break; 1884 1885 case 0x2103: 1886 // For case ignore, numeric, and stored prefix string matching rules, 1887 // characters are case folded per B.2 of [RFC3454] : U+2103 1888 target[limit++] = 0x00B0; 1889 target[limit++] = 0x0063; 1890 break; 1891 1892 case 0x2107: 1893 // For case ignore, numeric, and stored prefix string matching rules, 1894 // characters are case folded per B.2 of [RFC3454] : U+2107 1895 target[limit++] = 0x025B; 1896 break; 1897 1898 case 0x2109: 1899 // For case ignore, numeric, and stored prefix string matching rules, 1900 // characters are case folded per B.2 of [RFC3454] : U+2109 1901 target[limit++] = 0x00B0; 1902 target[limit++] = 0x0066; 1903 break; 1904 1905 case 0x210B: 1906 // For case ignore, numeric, and stored prefix string matching rules, 1907 // characters are case folded per B.2 of [RFC3454] : U+210B 1908 target[limit++] = 0x0068; 1909 break; 1910 1911 case 0x210C: 1912 // For case ignore, numeric, and stored prefix string matching rules, 1913 // characters are case folded per B.2 of [RFC3454] : U+210C 1914 target[limit++] = 0x0068; 1915 break; 1916 1917 case 0x210D: 1918 // For case ignore, numeric, and stored prefix string matching rules, 1919 // characters are case folded per B.2 of [RFC3454] : U+210D 1920 target[limit++] = 0x0068; 1921 break; 1922 1923 case 0x2110: 1924 // For case ignore, numeric, and stored prefix string matching rules, 1925 // characters are case folded per B.2 of [RFC3454] : U+2110 1926 target[limit++] = 0x0069; 1927 break; 1928 1929 case 0x2111: 1930 // For case ignore, numeric, and stored prefix string matching rules, 1931 // characters are case folded per B.2 of [RFC3454] : U+2111 1932 target[limit++] = 0x0069; 1933 break; 1934 1935 case 0x2112: 1936 // For case ignore, numeric, and stored prefix string matching rules, 1937 // characters are case folded per B.2 of [RFC3454] : U+2112 1938 target[limit++] = 0x006C; 1939 break; 1940 1941 case 0x2115: 1942 // For case ignore, numeric, and stored prefix string matching rules, 1943 // characters are case folded per B.2 of [RFC3454] : U+2115 1944 target[limit++] = 0x006E; 1945 break; 1946 1947 case 0x2116: 1948 // For case ignore, numeric, and stored prefix string matching rules, 1949 // characters are case folded per B.2 of [RFC3454] : U+2116 1950 target[limit++] = 0x006E; 1951 target[limit++] = 0x006F; 1952 break; 1953 1954 case 0x2119: 1955 case 0x211A: 1956 case 0x211B: 1957 // For case ignore, numeric, and stored prefix string matching rules, 1958 // characters are case folded per B.2 of [RFC3454] : U+2119-U+211B 1959 target[limit++] = ( char ) ( c - 0x2A09 ); 1960 break; 1961 1962 case 0x211C: 1963 // For case ignore, numeric, and stored prefix string matching rules, 1964 // characters are case folded per B.2 of [RFC3454] : U+211C 1965 target[limit++] = 0x0072; 1966 break; 1967 1968 case 0x211D: 1969 // For case ignore, numeric, and stored prefix string matching rules, 1970 // characters are case folded per B.2 of [RFC3454] : U+211D 1971 target[limit++] = 0x0072; 1972 break; 1973 1974 case 0x2120: 1975 // For case ignore, numeric, and stored prefix string matching rules, 1976 // characters are case folded per B.2 of [RFC3454] : U+2120 1977 target[limit++] = 0x0073; 1978 target[limit++] = 0x006D; 1979 break; 1980 1981 case 0x2121: 1982 // For case ignore, numeric, and stored prefix string matching rules, 1983 // characters are case folded per B.2 of [RFC3454] : U+2121 1984 target[limit++] = 0x0074; 1985 target[limit++] = 0x0065; 1986 target[limit++] = 0x006C; 1987 break; 1988 1989 case 0x2122: 1990 // For case ignore, numeric, and stored prefix string matching rules, 1991 // characters are case folded per B.2 of [RFC3454] : U+2122 1992 target[limit++] = 0x0074; 1993 target[limit++] = 0x006D; 1994 break; 1995 1996 case 0x2124: 1997 // For case ignore, numeric, and stored prefix string matching rules, 1998 // characters are case folded per B.2 of [RFC3454] : U+2122 1999 target[limit++] = 0x007A; 2000 break; 2001 2002 case 0x2126: 2003 // For case ignore, numeric, and stored prefix string matching rules, 2004 // characters are case folded per B.2 of [RFC3454] : U+2122 2005 target[limit++] = 0x03C9; 2006 break; 2007 2008 case 0x2128: 2009 // For case ignore, numeric, and stored prefix string matching rules, 2010 // characters are case folded per B.2 of [RFC3454] : U+2122 2011 target[limit++] = 0x007A; 2012 break; 2013 2014 case 0x212A: 2015 // For case ignore, numeric, and stored prefix string matching rules, 2016 // characters are case folded per B.2 of [RFC3454] : U+2122 2017 target[limit++] = 0x006B; 2018 break; 2019 2020 case 0x212B: 2021 // For case ignore, numeric, and stored prefix string matching rules, 2022 // characters are case folded per B.2 of [RFC3454] : U+2122 2023 target[limit++] = 0x00E5; 2024 break; 2025 2026 case 0x212C: 2027 case 0x212D: 2028 // For case ignore, numeric, and stored prefix string matching rules, 2029 // characters are case folded per B.2 of [RFC3454] : U+212C-U+212D 2030 target[limit++] = ( char ) ( c - 0x20CA ); 2031 break; 2032 2033 case 0x2130: 2034 case 0x2131: 2035 // For case ignore, numeric, and stored prefix string matching rules, 2036 // characters are case folded per B.2 of [RFC3454] : U+2130-U+2131 2037 target[limit++] = ( char ) ( c - 0x20CB ); 2038 break; 2039 2040 case 0x2133: 2041 // For case ignore, numeric, and stored prefix string matching rules, 2042 // characters are case folded per B.2 of [RFC3454] : U+2133 2043 target[limit++] = 0x006D; 2044 break; 2045 2046 case 0x213E: 2047 // For case ignore, numeric, and stored prefix string matching rules, 2048 // characters are case folded per B.2 of [RFC3454] : U+213E 2049 target[limit++] = 0x03B3; 2050 break; 2051 2052 case 0x213F: 2053 // For case ignore, numeric, and stored prefix string matching rules, 2054 // characters are case folded per B.2 of [RFC3454] : U+213F 2055 target[limit++] = 0x03C0; 2056 break; 2057 2058 case 0x2145: 2059 // For case ignore, numeric, and stored prefix string matching rules, 2060 // characters are case folded per B.2 of [RFC3454] : U+2145 2061 target[limit++] = 0x0064; 2062 break; 2063 2064 case 0x2160: 2065 case 0x2161: 2066 case 0x2162: 2067 case 0x2163: 2068 case 0x2164: 2069 case 0x2165: 2070 case 0x2166: 2071 case 0x2167: 2072 case 0x2168: 2073 case 0x2169: 2074 case 0x216A: 2075 case 0x216B: 2076 case 0x216C: 2077 case 0x216D: 2078 case 0x216E: 2079 case 0x216F: 2080 // For case ignore, numeric, and stored prefix string matching rules, 2081 // characters are case folded per B.2 of [RFC3454] : U+2160-U+216F 2082 target[limit++] = ( char ) ( c + 0x0010 ); 2083 break; 2084 2085 case 0x24B6: 2086 case 0x24B7: 2087 case 0x24B8: 2088 case 0x24B9: 2089 case 0x24BA: 2090 case 0x24BB: 2091 case 0x24BC: 2092 case 0x24BD: 2093 case 0x24BE: 2094 case 0x24BF: 2095 case 0x24C0: 2096 case 0x24C1: 2097 case 0x24C2: 2098 case 0x24C3: 2099 case 0x24C4: 2100 case 0x24C5: 2101 case 0x24C6: 2102 case 0x24C7: 2103 case 0x24C8: 2104 case 0x24C9: 2105 case 0x24CA: 2106 case 0x24CB: 2107 case 0x24CC: 2108 case 0x24CD: 2109 case 0x24CE: 2110 case 0x24CF: 2111 // For case ignore, numeric, and stored prefix string matching rules, 2112 // characters are case folded per B.2 of [RFC3454] : U+24B6-U+24CF 2113 target[limit++] = ( char ) ( c + 0x001A ); 2114 break; 2115 2116 case 0x3000: 2117 // All other code points with Separator (space, line, or paragraph) property 2118 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 2119 // list of these code points: ...3000. 2120 target[limit++] = 0x0020; 2121 break; 2122 2123 case 0x3371: 2124 // For case ignore, numeric, and stored prefix string matching rules, 2125 // characters are case folded per B.2 of [RFC3454] : U+3371 2126 target[limit++] = 0x0068; 2127 target[limit++] = 0x0070; 2128 target[limit++] = 0x0061; 2129 break; 2130 2131 case 0x3373: 2132 // For case ignore, numeric, and stored prefix string matching rules, 2133 // characters are case folded per B.2 of [RFC3454] : U+3373 2134 target[limit++] = 0x0061; 2135 target[limit++] = 0x0075; 2136 break; 2137 2138 case 0x3375: 2139 // For case ignore, numeric, and stored prefix string matching rules, 2140 // characters are case folded per B.2 of [RFC3454] : U+3375 2141 target[limit++] = 0x006F; 2142 target[limit++] = 0x0076; 2143 break; 2144 2145 case 0x3380: 2146 // For case ignore, numeric, and stored prefix string matching rules, 2147 // characters are case folded per B.2 of [RFC3454] : U+3380 2148 target[limit++] = 0x0070; 2149 target[limit++] = 0x0061; 2150 break; 2151 2152 case 0x3381: 2153 // For case ignore, numeric, and stored prefix string matching rules, 2154 // characters are case folded per B.2 of [RFC3454] : U+3381 2155 target[limit++] = 0x006E; 2156 target[limit++] = 0x0061; 2157 break; 2158 2159 case 0x3382: 2160 // For case ignore, numeric, and stored prefix string matching rules, 2161 // characters are case folded per B.2 of [RFC3454] : U+3382 2162 target[limit++] = 0x03BC; 2163 target[limit++] = 0x0061; 2164 break; 2165 2166 case 0x3383: 2167 // For case ignore, numeric, and stored prefix string matching rules, 2168 // characters are case folded per B.2 of [RFC3454] : U+3383 2169 target[limit++] = 0x006D; 2170 target[limit++] = 0x0061; 2171 break; 2172 2173 case 0x3384: 2174 // For case ignore, numeric, and stored prefix string matching rules, 2175 // characters are case folded per B.2 of [RFC3454] : U+3384 2176 target[limit++] = 0x006B; 2177 target[limit++] = 0x0061; 2178 break; 2179 2180 case 0x3385: 2181 // For case ignore, numeric, and stored prefix string matching rules, 2182 // characters are case folded per B.2 of [RFC3454] : U+3385 2183 target[limit++] = 0x006B; 2184 target[limit++] = 0x0062; 2185 break; 2186 2187 case 0x3386: 2188 // For case ignore, numeric, and stored prefix string matching rules, 2189 // characters are case folded per B.2 of [RFC3454] : U+3386 2190 target[limit++] = 0x006D; 2191 target[limit++] = 0x0062; 2192 break; 2193 2194 case 0x3387: 2195 // For case ignore, numeric, and stored prefix string matching rules, 2196 // characters are case folded per B.2 of [RFC3454] : U+3387 2197 target[limit++] = 0x0067; 2198 target[limit++] = 0x0062; 2199 break; 2200 2201 case 0x338A: 2202 // For case ignore, numeric, and stored prefix string matching rules, 2203 // characters are case folded per B.2 of [RFC3454] : U+338A 2204 target[limit++] = 0x0070; 2205 target[limit++] = 0x0066; 2206 break; 2207 2208 case 0x338B: 2209 // For case ignore, numeric, and stored prefix string matching rules, 2210 // characters are case folded per B.2 of [RFC3454] : U+338B 2211 target[limit++] = 0x006E; 2212 target[limit++] = 0x0066; 2213 break; 2214 2215 case 0x338C: 2216 // For case ignore, numeric, and stored prefix string matching rules, 2217 // characters are case folded per B.2 of [RFC3454] : U+338C 2218 target[limit++] = 0x03BC; 2219 target[limit++] = 0x0066; 2220 break; 2221 2222 case 0x3390: 2223 // For case ignore, numeric, and stored prefix string matching rules, 2224 // characters are case folded per B.2 of [RFC3454] : U+3390 2225 target[limit++] = 0x0068; 2226 target[limit++] = 0x007A; 2227 break; 2228 2229 case 0x3391: 2230 // For case ignore, numeric, and stored prefix string matching rules, 2231 // characters are case folded per B.2 of [RFC3454] : U+3391 2232 target[limit++] = 0x006B; 2233 target[limit++] = 0x0068; 2234 target[limit++] = 0x007A; 2235 break; 2236 2237 case 0x3392: 2238 // For case ignore, numeric, and stored prefix string matching rules, 2239 // characters are case folded per B.2 of [RFC3454] : U+3392 2240 target[limit++] = 0x006D; 2241 target[limit++] = 0x0068; 2242 target[limit++] = 0x007A; 2243 break; 2244 2245 case 0x3393: 2246 // For case ignore, numeric, and stored prefix string matching rules, 2247 // characters are case folded per B.2 of [RFC3454] : U+3393 2248 target[limit++] = 0x0067; 2249 target[limit++] = 0x0068; 2250 target[limit++] = 0x007A; 2251 break; 2252 2253 case 0x3394: 2254 // For case ignore, numeric, and stored prefix string matching rules, 2255 // characters are case folded per B.2 of [RFC3454] : U+3394 2256 target[limit++] = 0x0074; 2257 target[limit++] = 0x0068; 2258 target[limit++] = 0x007A; 2259 break; 2260 2261 case 0x33A9: 2262 // For case ignore, numeric, and stored prefix string matching rules, 2263 // characters are case folded per B.2 of [RFC3454] : U+33A9 2264 target[limit++] = 0x0070; 2265 target[limit++] = 0x0061; 2266 break; 2267 2268 case 0x33AA: 2269 // For case ignore, numeric, and stored prefix string matching rules, 2270 // characters are case folded per B.2 of [RFC3454] : U+33AA 2271 target[limit++] = 0x006B; 2272 target[limit++] = 0x0070; 2273 target[limit++] = 0x0061; 2274 break; 2275 2276 case 0x33AB: 2277 // For case ignore, numeric, and stored prefix string matching rules, 2278 // characters are case folded per B.2 of [RFC3454] : U+33AB 2279 target[limit++] = 0x006D; 2280 target[limit++] = 0x0070; 2281 target[limit++] = 0x0061; 2282 break; 2283 2284 case 0x33AC: 2285 // For case ignore, numeric, and stored prefix string matching rules, 2286 // characters are case folded per B.2 of [RFC3454] : U+33AC 2287 target[limit++] = 0x0067; 2288 target[limit++] = 0x0070; 2289 target[limit++] = 0x0061; 2290 break; 2291 2292 case 0x33B4: 2293 // For case ignore, numeric, and stored prefix string matching rules, 2294 // characters are case folded per B.2 of [RFC3454] : U+33B4 2295 target[limit++] = 0x0070; 2296 target[limit++] = 0x0076; 2297 break; 2298 2299 case 0x33B5: 2300 // For case ignore, numeric, and stored prefix string matching rules, 2301 // characters are case folded per B.2 of [RFC3454] : U+33B5 2302 target[limit++] = 0x006E; 2303 target[limit++] = 0x0076; 2304 break; 2305 2306 case 0x33B6: 2307 // For case ignore, numeric, and stored prefix string matching rules, 2308 // characters are case folded per B.2 of [RFC3454] : U+33B6 2309 target[limit++] = 0x03BC; 2310 target[limit++] = 0x0076; 2311 break; 2312 2313 case 0x33B7: 2314 // For case ignore, numeric, and stored prefix string matching rules, 2315 // characters are case folded per B.2 of [RFC3454] : U+33B7 2316 target[limit++] = 0x006D; 2317 target[limit++] = 0x0076; 2318 break; 2319 2320 case 0x33B8: 2321 // For case ignore, numeric, and stored prefix string matching rules, 2322 // characters are case folded per B.2 of [RFC3454] : U+33B8 2323 target[limit++] = 0x006B; 2324 target[limit++] = 0x0076; 2325 break; 2326 2327 case 0x33B9: 2328 // For case ignore, numeric, and stored prefix string matching rules, 2329 // characters are case folded per B.2 of [RFC3454] : U+33B9 2330 target[limit++] = 0x006D; 2331 target[limit++] = 0x0076; 2332 break; 2333 2334 case 0x33BA: 2335 // For case ignore, numeric, and stored prefix string matching rules, 2336 // characters are case folded per B.2 of [RFC3454] : U+33BA 2337 target[limit++] = 0x0070; 2338 target[limit++] = 0x0077; 2339 break; 2340 2341 case 0x33BB: 2342 // For case ignore, numeric, and stored prefix string matching rules, 2343 // characters are case folded per B.2 of [RFC3454] : U+33BB 2344 target[limit++] = 0x006E; 2345 target[limit++] = 0x0077; 2346 break; 2347 2348 case 0x33BC: 2349 // For case ignore, numeric, and stored prefix string matching rules, 2350 // characters are case folded per B.2 of [RFC3454] : U+33BC 2351 target[limit++] = 0x03BC; 2352 target[limit++] = 0x0077; 2353 break; 2354 2355 case 0x33BD: 2356 // For case ignore, numeric, and stored prefix string matching rules, 2357 // characters are case folded per B.2 of [RFC3454] : U+33BD 2358 target[limit++] = 0x006D; 2359 target[limit++] = 0x0077; 2360 break; 2361 2362 case 0x33BE: 2363 // For case ignore, numeric, and stored prefix string matching rules, 2364 // characters are case folded per B.2 of [RFC3454] : U+33BE 2365 target[limit++] = 0x006B; 2366 target[limit++] = 0x0077; 2367 break; 2368 2369 case 0x33BF: 2370 // For case ignore, numeric, and stored prefix string matching rules, 2371 // characters are case folded per B.2 of [RFC3454] : U+33BF 2372 target[limit++] = 0x006D; 2373 target[limit++] = 0x0077; 2374 break; 2375 2376 case 0x33C0: 2377 // For case ignore, numeric, and stored prefix string matching rules, 2378 // characters are case folded per B.2 of [RFC3454] : U+33C0 2379 target[limit++] = 0x006B; 2380 target[limit++] = 0x03C9; 2381 break; 2382 2383 case 0x33C1: 2384 // For case ignore, numeric, and stored prefix string matching rules, 2385 // characters are case folded per B.2 of [RFC3454] : U+33C1 2386 target[limit++] = 0x006D; 2387 target[limit++] = 0x03C9; 2388 break; 2389 2390 case 0x33C3: 2391 // For case ignore, numeric, and stored prefix string matching rules, 2392 // characters are case folded per B.2 of [RFC3454] : U+33C3 2393 target[limit++] = 0x0062; 2394 target[limit++] = 0x0071; 2395 break; 2396 2397 case 0x33C6: 2398 // For case ignore, numeric, and stored prefix string matching rules, 2399 // characters are case folded per B.2 of [RFC3454] : U+33C6 2400 target[limit++] = 0x0063; 2401 target[limit++] = 0x2215; 2402 target[limit++] = 0x006B; 2403 target[limit++] = 0x0067; 2404 break; 2405 2406 case 0x33C7: 2407 // For case ignore, numeric, and stored prefix string matching rules, 2408 // characters are case folded per B.2 of [RFC3454] : U+33C7 2409 target[limit++] = 0x0063; 2410 target[limit++] = 0x006F; 2411 target[limit++] = 0x002E; 2412 break; 2413 2414 case 0x33C8: 2415 // For case ignore, numeric, and stored prefix string matching rules, 2416 // characters are case folded per B.2 of [RFC3454] : U+33C8 2417 target[limit++] = 0x0064; 2418 target[limit++] = 0x0062; 2419 break; 2420 2421 case 0x33C9: 2422 // For case ignore, numeric, and stored prefix string matching rules, 2423 // characters are case folded per B.2 of [RFC3454] : U+33C9 2424 target[limit++] = 0x0067; 2425 target[limit++] = 0x0079; 2426 break; 2427 2428 case 0x33CB: 2429 // For case ignore, numeric, and stored prefix string matching rules, 2430 // characters are case folded per B.2 of [RFC3454] : U+33CB 2431 target[limit++] = 0x0068; 2432 target[limit++] = 0x0070; 2433 break; 2434 2435 case 0x33CD: 2436 // For case ignore, numeric, and stored prefix string matching rules, 2437 // characters are case folded per B.2 of [RFC3454] : U+33CD 2438 target[limit++] = 0x006B; 2439 target[limit++] = 0x006B; 2440 break; 2441 2442 case 0x33CE: 2443 // For case ignore, numeric, and stored prefix string matching rules, 2444 // characters are case folded per B.2 of [RFC3454] : U+33CE 2445 target[limit++] = 0x006B; 2446 target[limit++] = 0x006D; 2447 break; 2448 2449 case 0x33D7: 2450 // For case ignore, numeric, and stored prefix string matching rules, 2451 // characters are case folded per B.2 of [RFC3454] : U+33D7 2452 target[limit++] = 0x0070; 2453 target[limit++] = 0x0068; 2454 break; 2455 2456 case 0x33D9: 2457 // For case ignore, numeric, and stored prefix string matching rules, 2458 // characters are case folded per B.2 of [RFC3454] : U+33D9 2459 target[limit++] = 0x0070; 2460 target[limit++] = 0x0070; 2461 target[limit++] = 0x006D; 2462 break; 2463 2464 case 0x33DA: 2465 // For case ignore, numeric, and stored prefix string matching rules, 2466 // characters are case folded per B.2 of [RFC3454] : U+33DA 2467 target[limit++] = 0x0070; 2468 target[limit++] = 0x0072; 2469 break; 2470 2471 case 0x33DC: 2472 // For case ignore, numeric, and stored prefix string matching rules, 2473 // characters are case folded per B.2 of [RFC3454] : U+33DC 2474 target[limit++] = 0x0073; 2475 target[limit++] = 0x0076; 2476 break; 2477 2478 case 0x33DD: 2479 // For case ignore, numeric, and stored prefix string matching rules, 2480 // characters are case folded per B.2 of [RFC3454] : U+33DD 2481 target[limit++] = 0x0077; 2482 target[limit++] = 0x0062; 2483 break; 2484 2485 case 0xFB00: 2486 // For case ignore, numeric, and stored prefix string matching rules, 2487 // characters are case folded per B.2 of [RFC3454] : U+FB00 2488 target[limit++] = 0x0066; 2489 target[limit++] = 0x0066; 2490 break; 2491 2492 case 0xFB01: 2493 // For case ignore, numeric, and stored prefix string matching rules, 2494 // characters are case folded per B.2 of [RFC3454] : U+FB01 2495 target[limit++] = 0x0066; 2496 target[limit++] = 0x0069; 2497 break; 2498 2499 case 0xFB02: 2500 // For case ignore, numeric, and stored prefix string matching rules, 2501 // characters are case folded per B.2 of [RFC3454] : U+FB02 2502 target[limit++] = 0x0066; 2503 target[limit++] = 0x006C; 2504 break; 2505 2506 case 0xFB03: 2507 // For case ignore, numeric, and stored prefix string matching rules, 2508 // characters are case folded per B.2 of [RFC3454] : U+FB03 2509 target[limit++] = 0x0066; 2510 target[limit++] = 0x0066; 2511 target[limit++] = 0x0069; 2512 break; 2513 2514 case 0xFB04: 2515 // For case ignore, numeric, and stored prefix string matching rules, 2516 // characters are case folded per B.2 of [RFC3454] : U+FB04 2517 target[limit++] = 0x0066; 2518 target[limit++] = 0x0066; 2519 target[limit++] = 0x006C; 2520 break; 2521 2522 case 0xFB05: 2523 // For case ignore, numeric, and stored prefix string matching rules, 2524 // characters are case folded per B.2 of [RFC3454] : U+FB05 2525 target[limit++] = 0x0073; 2526 target[limit++] = 0x0074; 2527 break; 2528 2529 case 0xFB06: 2530 // For case ignore, numeric, and stored prefix string matching rules, 2531 // characters are case folded per B.2 of [RFC3454] : U+FB06 2532 target[limit++] = 0x0073; 2533 target[limit++] = 0x0074; 2534 break; 2535 2536 case 0xFB13: 2537 // For case ignore, numeric, and stored prefix string matching rules, 2538 // characters are case folded per B.2 of [RFC3454] : U+FB13 2539 target[limit++] = 0x0574; 2540 target[limit++] = 0x0576; 2541 break; 2542 2543 case 0xFB14: 2544 // For case ignore, numeric, and stored prefix string matching rules, 2545 // characters are case folded per B.2 of [RFC3454] : U+FB14 2546 target[limit++] = 0x0574; 2547 target[limit++] = 0x0565; 2548 break; 2549 2550 case 0xFB15: 2551 // For case ignore, numeric, and stored prefix string matching rules, 2552 // characters are case folded per B.2 of [RFC3454] : U+FB15 2553 target[limit++] = 0x0574; 2554 target[limit++] = 0x056B; 2555 break; 2556 2557 case 0xFB16: 2558 // For case ignore, numeric, and stored prefix string matching rules, 2559 // characters are case folded per B.2 of [RFC3454] : U+FB16 2560 target[limit++] = 0x057E; 2561 target[limit++] = 0x0576; 2562 break; 2563 2564 case 0xFB17: 2565 // For case ignore, numeric, and stored prefix string matching rules, 2566 // characters are case folded per B.2 of [RFC3454] : U+FB17 2567 target[limit++] = 0x0574; 2568 target[limit++] = 0x056D; 2569 break; 2570 2571 case 0xFE00: 2572 case 0xFE01: 2573 case 0xFE02: 2574 case 0xFE03: 2575 case 0xFE04: 2576 case 0xFE05: 2577 case 0xFE06: 2578 case 0xFE07: 2579 case 0xFE08: 2580 case 0xFE09: 2581 case 0xFE0A: 2582 case 0xFE0B: 2583 case 0xFE0C: 2584 case 0xFE0D: 2585 case 0xFE0E: 2586 case 0xFE0F: 2587 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 2588 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 2589 // VARIATION SELECTORs (U+180B-180D, FE00-FE0F) code points are also 2590 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 2591 // mapped to nothing. 2592 break; 2593 2594 case 0xFEFF: 2595 // All other control code (e.g., Cc) points or code points with a 2596 // control function (e.g., Cf) are mapped to nothing. The following is 2597 // a complete list of these code points: ... U+FEFF... 2598 break; 2599 2600 case 0xFF21: 2601 case 0xFF22: 2602 case 0xFF23: 2603 case 0xFF24: 2604 case 0xFF25: 2605 case 0xFF26: 2606 case 0xFF27: 2607 case 0xFF28: 2608 case 0xFF29: 2609 case 0xFF2A: 2610 case 0xFF2B: 2611 case 0xFF2C: 2612 case 0xFF2D: 2613 case 0xFF2E: 2614 case 0xFF2F: 2615 case 0xFF30: 2616 case 0xFF31: 2617 case 0xFF32: 2618 case 0xFF33: 2619 case 0xFF34: 2620 case 0xFF35: 2621 case 0xFF36: 2622 case 0xFF37: 2623 case 0xFF38: 2624 case 0xFF39: 2625 case 0xFF3A: 2626 // For case ignore, numeric, and stored prefix string matching rules, 2627 // characters are case folded per B.2 of [RFC3454] : U+FF21-FF3A 2628 target[limit++] = ( char ) ( c + 0x0020 ); 2629 break; 2630 2631 case 0xFFF9: 2632 case 0xFFFA: 2633 case 0xFFFB: 2634 // All other control code (e.g., Cc) points or code points with a 2635 // control function (e.g., Cf) are mapped to nothing. The following is 2636 // a complete list of these code points: ... U+FFF9-FFFB... 2637 break; 2638 2639 case 0xFFFC: 2640 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 2641 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 2642 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 2643 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 2644 // mapped to nothing. 2645 break; 2646 2647 default: 2648 // First, eliminate surrogates, and replace them by FFFD char 2649 if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) ) 2650 { 2651 target[limit++] = 0xFFFD; 2652 break; 2653 } 2654 2655 target[limit++] = c; 2656 break; 2657 } 2658 } 2659 2660 return new String( target, 0, limit ); 2661 } 2662 2663 2664 /** 2665 * Check that the String does not contain any prohibited char 2666 * 2667 * @param value The String to analyze 2668 * @throws InvalidCharacterException If any character is prohibited 2669 */ 2670 public static void checkProhibited( char[] value ) throws InvalidCharacterException 2671 { 2672 for ( char c : value ) 2673 { 2674 checkProhibited( c ); 2675 } 2676 } 2677 2678 /** 2679 * 2680 * Prohibit characters described in RFC 4518 : 2681 * - Table A.1 of RFC 3454 2682 * - Table C.3 of RFC 3454 2683 * - Table C.4 of RFC 3454 2684 * - Table C.5 of RFC 3454 2685 * - Table C.8 of RFC 3454 2686 * - character U-FFFD 2687 * 2688 * @param c The char to analyze 2689 * @throws InvalidCharacterException If any character is prohibited 2690 */ 2691 private static void checkProhibited( char c ) throws InvalidCharacterException 2692 { 2693 // Shortcut chars above 0x0221 2694 if ( c < 0x221 ) 2695 { 2696 return; 2697 } 2698 2699 // RFC 3454, Table A.1 2700 switch ( c ) 2701 { 2702 case 0x0221: 2703 case 0x038B: 2704 case 0x038D: 2705 case 0x03A2: 2706 case 0x03CF: 2707 case 0x0487: 2708 case 0x04CF: 2709 case 0x0560: 2710 case 0x0588: 2711 case 0x05A2: 2712 case 0x05BA: 2713 case 0x0620: 2714 case 0x06FF: 2715 case 0x070E: 2716 case 0x0904: 2717 case 0x0984: 2718 case 0x09A9: 2719 case 0x09B1: 2720 case 0x09BD: 2721 case 0x09DE: 2722 case 0x0A29: 2723 case 0x0A31: 2724 case 0x0A34: 2725 case 0x0A37: 2726 case 0x0A3D: 2727 case 0x0A5D: 2728 case 0x0A84: 2729 case 0x0A8C: 2730 case 0x0A8E: 2731 case 0x0A92: 2732 case 0x0AA9: 2733 case 0x0AB1: 2734 case 0x0AB4: 2735 case 0x0AC6: 2736 case 0x0ACA: 2737 case 0x0B04: 2738 case 0x0B29: 2739 case 0x0B31: 2740 case 0x0B5E: 2741 case 0x0B84: 2742 case 0x0B91: 2743 case 0x0B9B: 2744 case 0x0B9D: 2745 case 0x0BB6: 2746 case 0x0BC9: 2747 case 0x0C04: 2748 case 0x0C0D: 2749 case 0x0C11: 2750 case 0x0C29: 2751 case 0x0C34: 2752 case 0x0C45: 2753 case 0x0C49: 2754 case 0x0C84: 2755 case 0x0C8D: 2756 case 0x0C91: 2757 case 0x0CA9: 2758 case 0x0CB4: 2759 case 0x0CC5: 2760 case 0x0CC9: 2761 case 0x0CDF: 2762 case 0x0D04: 2763 case 0x0D0D: 2764 case 0x0D11: 2765 case 0x0D29: 2766 case 0x0D49: 2767 case 0x0D84: 2768 case 0x0DB2: 2769 case 0x0DBC: 2770 case 0x0DD5: 2771 case 0x0DD7: 2772 case 0x0E83: 2773 case 0x0E89: 2774 case 0x0E98: 2775 case 0x0EA0: 2776 case 0x0EA4: 2777 case 0x0EA6: 2778 case 0x0EAC: 2779 case 0x0EBA: 2780 case 0x0EC5: 2781 case 0x0EC7: 2782 case 0x0F48: 2783 case 0x0F98: 2784 case 0x0FBD: 2785 case 0x1022: 2786 case 0x1028: 2787 case 0x102B: 2788 case 0x1207: 2789 case 0x1247: 2790 case 0x1249: 2791 case 0x1257: 2792 case 0x1259: 2793 case 0x1287: 2794 case 0x1289: 2795 case 0x12AF: 2796 case 0x12B1: 2797 case 0x12BF: 2798 case 0x12C1: 2799 case 0x12CF: 2800 case 0x12D7: 2801 case 0x12EF: 2802 case 0x130F: 2803 case 0x1311: 2804 case 0x131F: 2805 case 0x1347: 2806 case 0x170D: 2807 case 0x176D: 2808 case 0x1771: 2809 case 0x180F: 2810 case 0x1F58: 2811 case 0x1F5A: 2812 case 0x1F5C: 2813 case 0x1F5E: 2814 case 0x1FB5: 2815 case 0x1FC5: 2816 case 0x1FDC: 2817 case 0x1FF5: 2818 case 0x1FFF: 2819 case 0x24FF: 2820 case 0x2618: 2821 case 0x2705: 2822 case 0x2728: 2823 case 0x274C: 2824 case 0x274E: 2825 case 0x2757: 2826 case 0x27B0: 2827 case 0x2E9A: 2828 case 0x3040: 2829 case 0x318F: 2830 case 0x32FF: 2831 case 0x33FF: 2832 case 0xFB37: 2833 case 0xFB3D: 2834 case 0xFB3F: 2835 case 0xFB42: 2836 case 0xFB45: 2837 case 0xFE53: 2838 case 0xFE67: 2839 case 0xFE75: 2840 case 0xFF00: 2841 case 0xFFE7: 2842 throw new InvalidCharacterException( c ); 2843 default: 2844 break; 2845 } 2846 2847 // RFC 3454, Table A.1, intervals 2848 if ( ( c >= 0x0234 ) && ( c <= 0x024F ) ) 2849 { 2850 throw new InvalidCharacterException( c ); 2851 } 2852 2853 if ( ( c >= 0x02AE ) && ( c <= 0x02AF ) ) 2854 { 2855 throw new InvalidCharacterException( c ); 2856 } 2857 2858 if ( ( c >= 0x02EF ) && ( c <= 0x02FF ) ) 2859 { 2860 throw new InvalidCharacterException( c ); 2861 } 2862 2863 if ( ( c >= 0x0350 ) && ( c <= 0x035F ) ) 2864 { 2865 throw new InvalidCharacterException( c ); 2866 } 2867 2868 if ( ( c >= 0x0370 ) && ( c <= 0x0373 ) ) 2869 { 2870 throw new InvalidCharacterException( c ); 2871 } 2872 2873 if ( ( c >= 0x0376 ) && ( c <= 0x0379 ) ) 2874 { 2875 throw new InvalidCharacterException( c ); 2876 } 2877 2878 if ( ( c >= 0x037B ) && ( c <= 0x037D ) ) 2879 { 2880 throw new InvalidCharacterException( c ); 2881 } 2882 2883 if ( ( c >= 0x037F ) && ( c <= 0x0383 ) ) 2884 { 2885 throw new InvalidCharacterException( c ); 2886 } 2887 2888 if ( ( c >= 0x03F7 ) && ( c <= 0x03FF ) ) 2889 { 2890 throw new InvalidCharacterException( c ); 2891 } 2892 2893 if ( ( c >= 0x04F6 ) && ( c <= 0x04F7 ) ) 2894 { 2895 throw new InvalidCharacterException( c ); 2896 } 2897 2898 if ( ( c >= 0x04FA ) && ( c <= 0x04FF ) ) 2899 { 2900 throw new InvalidCharacterException( c ); 2901 } 2902 2903 if ( ( c >= 0x0510 ) && ( c <= 0x0530 ) ) 2904 { 2905 throw new InvalidCharacterException( c ); 2906 } 2907 2908 if ( ( c >= 0x0557 ) && ( c <= 0x0558 ) ) 2909 { 2910 throw new InvalidCharacterException( c ); 2911 } 2912 2913 if ( ( c >= 0x058B ) && ( c <= 0x0590 ) ) 2914 { 2915 throw new InvalidCharacterException( c ); 2916 } 2917 2918 if ( ( c >= 0x05C5 ) && ( c <= 0x05CF ) ) 2919 { 2920 throw new InvalidCharacterException( c ); 2921 } 2922 2923 if ( ( c >= 0x05EB ) && ( c <= 0x05EF ) ) 2924 { 2925 throw new InvalidCharacterException( c ); 2926 } 2927 2928 if ( ( c >= 0x05F5 ) && ( c <= 0x060B ) ) 2929 { 2930 throw new InvalidCharacterException( c ); 2931 } 2932 2933 if ( ( c >= 0x060D ) && ( c <= 0x061A ) ) 2934 { 2935 throw new InvalidCharacterException( c ); 2936 } 2937 2938 if ( ( c >= 0x061C ) && ( c <= 0x061E ) ) 2939 { 2940 throw new InvalidCharacterException( c ); 2941 } 2942 2943 if ( ( c >= 0x063B ) && ( c <= 0x063F ) ) 2944 { 2945 throw new InvalidCharacterException( c ); 2946 } 2947 2948 if ( ( c >= 0x0656 ) && ( c <= 0x065F ) ) 2949 { 2950 throw new InvalidCharacterException( c ); 2951 } 2952 2953 if ( ( c >= 0x06EE ) && ( c <= 0x06EF ) ) 2954 { 2955 throw new InvalidCharacterException( c ); 2956 } 2957 2958 if ( ( c >= 0x072D ) && ( c <= 0x072F ) ) 2959 { 2960 throw new InvalidCharacterException( c ); 2961 } 2962 2963 if ( ( c >= 0x074B ) && ( c <= 0x077F ) ) 2964 { 2965 throw new InvalidCharacterException( c ); 2966 } 2967 2968 if ( ( c >= 0x07B2 ) && ( c <= 0x0900 ) ) 2969 { 2970 throw new InvalidCharacterException( c ); 2971 } 2972 2973 if ( ( c >= 0x093A ) && ( c <= 0x093B ) ) 2974 { 2975 throw new InvalidCharacterException( c ); 2976 } 2977 2978 if ( ( c >= 0x094E ) && ( c <= 0x094F ) ) 2979 { 2980 throw new InvalidCharacterException( c ); 2981 } 2982 2983 if ( ( c >= 0x0955 ) && ( c <= 0x0957 ) ) 2984 { 2985 throw new InvalidCharacterException( c ); 2986 } 2987 2988 if ( ( c >= 0x0971 ) && ( c <= 0x0980 ) ) 2989 { 2990 throw new InvalidCharacterException( c ); 2991 } 2992 2993 if ( ( c >= 0x098D ) && ( c <= 0x098E ) ) 2994 { 2995 throw new InvalidCharacterException( c ); 2996 } 2997 2998 if ( ( c >= 0x0991 ) && ( c <= 0x0992 ) ) 2999 { 3000 throw new InvalidCharacterException( c ); 3001 } 3002 3003 if ( ( c >= 0x09B3 ) && ( c <= 0x09B5 ) ) 3004 { 3005 throw new InvalidCharacterException( c ); 3006 } 3007 3008 if ( ( c >= 0x09BA ) && ( c <= 0x09BB ) ) 3009 { 3010 throw new InvalidCharacterException( c ); 3011 } 3012 3013 if ( ( c >= 0x09C5 ) && ( c <= 0x09C6 ) ) 3014 { 3015 throw new InvalidCharacterException( c ); 3016 } 3017 3018 if ( ( c >= 0x09C9 ) && ( c <= 0x09CA ) ) 3019 { 3020 throw new InvalidCharacterException( c ); 3021 } 3022 3023 if ( ( c >= 0x09CE ) && ( c <= 0x09D6 ) ) 3024 { 3025 throw new InvalidCharacterException( c ); 3026 } 3027 3028 if ( ( c >= 0x09D8 ) && ( c <= 0x09DB ) ) 3029 { 3030 throw new InvalidCharacterException( c ); 3031 } 3032 3033 if ( ( c >= 0x09E4 ) && ( c <= 0x09E5 ) ) 3034 { 3035 throw new InvalidCharacterException( c ); 3036 } 3037 3038 if ( ( c >= 0x09FB ) && ( c <= 0x0A01 ) ) 3039 { 3040 throw new InvalidCharacterException( c ); 3041 } 3042 3043 if ( ( c >= 0x0A03 ) && ( c <= 0x0A04 ) ) 3044 { 3045 throw new InvalidCharacterException( c ); 3046 } 3047 3048 if ( ( c >= 0x0A0B ) && ( c <= 0x0A0E ) ) 3049 { 3050 throw new InvalidCharacterException( c ); 3051 } 3052 3053 if ( ( c >= 0x0A11 ) && ( c <= 0x0A12 ) ) 3054 { 3055 throw new InvalidCharacterException( c ); 3056 } 3057 3058 if ( ( c >= 0x0A3A ) && ( c <= 0x0A3B ) ) 3059 { 3060 throw new InvalidCharacterException( c ); 3061 } 3062 3063 if ( ( c >= 0x0A43 ) && ( c <= 0x0A46 ) ) 3064 { 3065 throw new InvalidCharacterException( c ); 3066 } 3067 3068 if ( ( c >= 0x0A49 ) && ( c <= 0x0A4A ) ) 3069 { 3070 throw new InvalidCharacterException( c ); 3071 } 3072 3073 if ( ( c >= 0x0A4E ) && ( c <= 0x0A58 ) ) 3074 { 3075 throw new InvalidCharacterException( c ); 3076 } 3077 3078 if ( ( c >= 0x0A5F ) && ( c <= 0x0A65 ) ) 3079 { 3080 throw new InvalidCharacterException( c ); 3081 } 3082 3083 if ( ( c >= 0x0A75 ) && ( c <= 0x0A80 ) ) 3084 { 3085 throw new InvalidCharacterException( c ); 3086 } 3087 3088 if ( ( c >= 0x0ABA ) && ( c <= 0x0ABB ) ) 3089 { 3090 throw new InvalidCharacterException( c ); 3091 } 3092 3093 if ( ( c >= 0x0ACE ) && ( c <= 0x0ACF ) ) 3094 { 3095 throw new InvalidCharacterException( c ); 3096 } 3097 3098 if ( ( c >= 0x0AD1 ) && ( c <= 0x0ADF ) ) 3099 { 3100 throw new InvalidCharacterException( c ); 3101 } 3102 3103 if ( ( c >= 0x0AE1 ) && ( c <= 0x0AE5 ) ) 3104 { 3105 throw new InvalidCharacterException( c ); 3106 } 3107 3108 if ( ( c >= 0x0AF0 ) && ( c <= 0x0B00 ) ) 3109 { 3110 throw new InvalidCharacterException( c ); 3111 } 3112 3113 if ( ( c >= 0x0B0D ) && ( c <= 0x0B0E ) ) 3114 { 3115 throw new InvalidCharacterException( c ); 3116 } 3117 3118 if ( ( c >= 0x0B11 ) && ( c <= 0x0B12 ) ) 3119 { 3120 throw new InvalidCharacterException( c ); 3121 } 3122 3123 if ( ( c >= 0x0B34 ) && ( c <= 0x0B35 ) ) 3124 { 3125 throw new InvalidCharacterException( c ); 3126 } 3127 3128 if ( ( c >= 0x0B3A ) && ( c <= 0x0B3B ) ) 3129 { 3130 throw new InvalidCharacterException( c ); 3131 } 3132 3133 if ( ( c >= 0x0B44 ) && ( c <= 0x0B46 ) ) 3134 { 3135 throw new InvalidCharacterException( c ); 3136 } 3137 3138 if ( ( c >= 0x0B49 ) && ( c <= 0x0B4A ) ) 3139 { 3140 throw new InvalidCharacterException( c ); 3141 } 3142 3143 if ( ( c >= 0x0B4E ) && ( c <= 0x0B55 ) ) 3144 { 3145 throw new InvalidCharacterException( c ); 3146 } 3147 3148 if ( ( c >= 0x0B58 ) && ( c <= 0x0B5B ) ) 3149 { 3150 throw new InvalidCharacterException( c ); 3151 } 3152 3153 if ( ( c >= 0x0B62 ) && ( c <= 0x0B65 ) ) 3154 { 3155 throw new InvalidCharacterException( c ); 3156 } 3157 3158 if ( ( c >= 0x0B71 ) && ( c <= 0x0B81 ) ) 3159 { 3160 throw new InvalidCharacterException( c ); 3161 } 3162 3163 if ( ( c >= 0x0B8B ) && ( c <= 0x0B8D ) ) 3164 { 3165 throw new InvalidCharacterException( c ); 3166 } 3167 3168 if ( ( c >= 0x0B96 ) && ( c <= 0x0B98 ) ) 3169 { 3170 throw new InvalidCharacterException( c ); 3171 } 3172 3173 if ( ( c >= 0x0BA0 ) && ( c <= 0x0BA2 ) ) 3174 { 3175 throw new InvalidCharacterException( c ); 3176 } 3177 3178 if ( ( c >= 0x0BA5 ) && ( c <= 0x0BA7 ) ) 3179 { 3180 throw new InvalidCharacterException( c ); 3181 } 3182 3183 if ( ( c >= 0x0BAB ) && ( c <= 0x0BAD ) ) 3184 { 3185 throw new InvalidCharacterException( c ); 3186 } 3187 3188 if ( ( c >= 0x0BBA ) && ( c <= 0x0BBD ) ) 3189 { 3190 throw new InvalidCharacterException( c ); 3191 } 3192 3193 if ( ( c >= 0x0BC3 ) && ( c <= 0x0BC5 ) ) 3194 { 3195 throw new InvalidCharacterException( c ); 3196 } 3197 3198 if ( ( c >= 0x0BCE ) && ( c <= 0x0BD6 ) ) 3199 { 3200 throw new InvalidCharacterException( c ); 3201 } 3202 3203 if ( ( c >= 0x0BD8 ) && ( c <= 0x0BE6 ) ) 3204 { 3205 throw new InvalidCharacterException( c ); 3206 } 3207 3208 if ( ( c >= 0x0BF3 ) && ( c <= 0x0C00 ) ) 3209 { 3210 throw new InvalidCharacterException( c ); 3211 } 3212 3213 // RFC 3454, Table C.3 3214 if ( ( c >= 0xE000 ) && ( c <= 0xF8FF ) ) 3215 { 3216 throw new InvalidCharacterException( c ); 3217 } 3218 3219 // RFC 3454, Table C.4 3220 if ( ( c >= 0xFDD0 ) && ( c <= 0xFDEF ) ) 3221 { 3222 throw new InvalidCharacterException( c ); 3223 } 3224 3225 if ( ( c == 0xFFFE ) || ( c == 0xFFFF ) ) 3226 { 3227 throw new InvalidCharacterException( c ); 3228 } 3229 3230 // RFC 3454, Table C.5 (Surrogates) 3231 if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) ) 3232 { 3233 throw new InvalidCharacterException( c ); 3234 } 3235 3236 // RFC 3454, Table C.8 3237 switch ( c ) 3238 { 3239 case 0x0340: // COMBINING GRAVE TONE MARK 3240 case 0x0341: // COMBINING ACUTE TONE MARK 3241 case 0x200E: // LEFT-TO-RIGHT MARK 3242 case 0x200F: // RIGHT-TO-LEFT MARK 3243 case 0x202A: // LEFT-TO-RIGHT EMBEDDING 3244 case 0x202B: // RIGHT-TO-LEFT EMBEDDING 3245 case 0x202C: // POP DIRECTIONAL FORMATTING 3246 case 0x202D: // LEFT-TO-RIGHT OVERRIDE 3247 case 0x202E: // RIGHT-TO-LEFT OVERRIDE 3248 case 0x206A: // INHIBIT SYMMETRIC SWAPPING 3249 case 0x206B: // ACTIVATE SYMMETRIC SWAPPING 3250 case 0x206C: // INHIBIT ARABIC FORM SHAPING 3251 case 0x206D: // ACTIVATE ARABIC FORM SHAPING 3252 case 0x206E: // NATIONAL DIGIT SHAPES 3253 case 0x206F: // NOMINAL DIGIT SHAPES 3254 throw new InvalidCharacterException( c ); 3255 default: 3256 break; 3257 } 3258 3259 if ( c == 0xFFFD ) 3260 { 3261 throw new InvalidCharacterException( c ); 3262 } 3263 } 3264 3265 3266 /** 3267 * Remove all insignificant spaces in a numeric string. For 3268 * instance, the following numeric string : 3269 * " 123 456 789 " 3270 * will be transformed to : 3271 * "123456789" 3272 * 3273 * @param source The numeric String 3274 * @return The modified numeric String 3275 */ 3276 public static String insignificantNumericStringHandling( char[] source ) 3277 { 3278 int pos = 0; 3279 3280 for ( char c : source ) 3281 { 3282 if ( c != 0x20 ) 3283 { 3284 source[pos++] = c; 3285 } 3286 } 3287 3288 return new String( source, 0, pos ); 3289 } 3290 3291 3292 /** 3293 * Remove all insignificant spaces in a TelephoneNumber string, 3294 * Hyphen and spaces. 3295 * 3296 * For instance, the following telephone number : 3297 * "+ (33) 1-123--456 789" 3298 * will be transformed to : 3299 * "+(33)1123456789" 3300 * 3301 * @param source The telephoneNumber String 3302 * @return The modified telephoneNumber String 3303 */ 3304 public static String insignificantTelephoneNumberStringHandling( char[] source ) 3305 { 3306 if ( source == null ) 3307 { 3308 return null; 3309 } 3310 3311 int pos = 0; 3312 3313 for ( char c : source ) 3314 { 3315 switch ( c ) 3316 { 3317 case 0x0020: // SPACE 3318 case 0x002D: // HYPHEN-MINUS 3319 case 0x058A: // ARMENIAN HYPHEN 3320 case 0x2010: // HYPHEN 3321 case 0x2011: // NON-BREAKING HYPHEN 3322 case 0x2212: // MINUS SIGN 3323 case 0xFE63: // SMALL HYPHEN-MINUS 3324 case 0xFF0D: // FULLWIDTH HYPHEN-MINUS 3325 break; 3326 3327 default: 3328 source[pos++] = c; 3329 break; 3330 } 3331 } 3332 3333 return new String( source, 0, pos ); 3334 } 3335 3336 3337 /** 3338 * Remove all insignificant spaces in a string. Any resulting String will start 3339 * with a space, ands with a space and every spaces found in the middle of the String 3340 * will be aggregated into two consecutive spaces : 3341 * 3342 * <ul> 3343 * <li>empty string --> <space><space> </li> 3344 * <li>A --> <space>A<space> </li> 3345 * <li><space>A --> <space>A<space> </li> 3346 * <li><space><space>A --> <space>A<space> </li> 3347 * <li>A<space> --> <space>A<space> </li> 3348 * <li>A<space><space><space>B --> <space>A<space><space>B<space> </li> 3349 * </ul> 3350 * This method use a finite state machine to parse the text. 3351 * 3352 * @param origin The String to modify 3353 * @return The modified String 3354 */ 3355 public static String insignificantSpacesStringValue( char[] origin ) 3356 { 3357 if ( origin == null ) 3358 { 3359 // Special case : a null strings is replaced by 2 spaces 3360 return " "; 3361 } 3362 3363 int pos = 0; 3364 3365 // Create a target char array which is longer than the original String, as we will 3366 // have 2 more spaces (one at the beginning, one at the end, and each space in the 3367 // middle will be doubled). 3368 int newPos = 0; 3369 3370 char[] target = new char[origin.length * 2 + 1]; 3371 NormStateEnum normState = NormStateEnum.START; 3372 3373 while ( normState != NormStateEnum.END ) 3374 { 3375 switch ( normState ) 3376 { 3377 case START : 3378 if ( pos == origin.length ) 3379 { 3380 // We are done, it's an empty string 3381 return " "; 3382 } 3383 3384 char c = origin[pos]; 3385 3386 if ( c == ' ' ) 3387 { 3388 pos++; 3389 normState = NormStateEnum.INITIAL_SPACES; 3390 } 3391 else 3392 { 3393 // First add a space 3394 target[newPos++] = ' '; 3395 3396 // Then the char 3397 target[newPos++] = c; 3398 3399 pos++; 3400 normState = NormStateEnum.INITIAL_CHAR; 3401 } 3402 3403 break; 3404 3405 case INITIAL_CHAR : 3406 if ( pos == origin.length ) 3407 { 3408 // We are done, add a space 3409 target[newPos++] = ' '; 3410 normState = NormStateEnum.END; 3411 3412 break; 3413 } 3414 3415 c = origin[pos]; 3416 3417 if ( c == ' ' ) 3418 { 3419 // Switch to the SPACES state 3420 pos++; 3421 normState = NormStateEnum.SPACES; 3422 } 3423 else 3424 { 3425 // Add the char 3426 target[newPos++] = c; 3427 pos++; 3428 normState = NormStateEnum.CHARS; 3429 } 3430 3431 break; 3432 3433 case INITIAL_SPACES : 3434 if ( pos == origin.length ) 3435 { 3436 // We are done, this is an empty String 3437 return " "; 3438 } 3439 3440 c = origin[pos]; 3441 3442 if ( c == ' ' ) 3443 { 3444 pos++; 3445 // Keep going with the current state 3446 } 3447 else 3448 { 3449 // Add a space 3450 target[newPos++] = ' '; 3451 3452 // Add the char 3453 target[newPos++] = c; 3454 pos++; 3455 normState = NormStateEnum.INITIAL_CHAR; 3456 } 3457 3458 break; 3459 3460 case CHARS : 3461 if ( pos == origin.length ) 3462 { 3463 // We are done, add a Space 3464 target[newPos++] = ' '; 3465 normState = NormStateEnum.END; 3466 3467 break; 3468 } 3469 3470 c = origin[pos]; 3471 3472 if ( c == ' ' ) 3473 { 3474 pos++; 3475 normState = NormStateEnum.SPACES; 3476 } 3477 else 3478 { 3479 // Add the char 3480 target[newPos++] = c; 3481 pos++; 3482 // We keep going on the same state 3483 } 3484 3485 break; 3486 3487 case SPACES : 3488 if ( pos == origin.length ) 3489 { 3490 // We are done, add a Space 3491 target[newPos++] = ' '; 3492 normState = NormStateEnum.END; 3493 3494 break; 3495 } 3496 3497 c = origin[pos]; 3498 3499 if ( c == ' ' ) 3500 { 3501 pos++; 3502 // We keep going on the same state 3503 } 3504 else 3505 { 3506 // Add the two spaces 3507 target[newPos++] = ' '; 3508 target[newPos++] = ' '; 3509 3510 // Add the char 3511 target[newPos++] = c; 3512 pos++; 3513 3514 // Switch to SPACE_CHAR state 3515 normState = NormStateEnum.SPACE_CHAR; 3516 } 3517 3518 break; 3519 3520 case SPACE_CHAR : 3521 if ( pos == origin.length ) 3522 { 3523 // We are done, add a Space 3524 target[newPos++] = ' '; 3525 normState = NormStateEnum.END; 3526 3527 break; 3528 } 3529 3530 c = origin[pos]; 3531 3532 if ( c == ' ' ) 3533 { 3534 pos++; 3535 3536 // Switch to Spaces state 3537 normState = NormStateEnum.SPACES; 3538 } 3539 else 3540 { 3541 // Add the char 3542 target[newPos++] = c; 3543 pos++; 3544 3545 // Switch to CHARS state 3546 normState = NormStateEnum.CHARS; 3547 } 3548 3549 break; 3550 3551 default : 3552 // Do nothing 3553 break; 3554 } 3555 } 3556 3557 // create the resulting String 3558 return new String( target, 0, newPos ); 3559 } 3560 3561 3562 /** 3563 * Remove all insignificant spaces in a Initial assertion. A String will always start 3564 * with one space, every space in the middle will be doubled and if there are spaces 3565 * at the end, they will be replaced by one space : 3566 * <ul> 3567 * <li>A --> <space>A </li> 3568 * <li><space>A --> <space>A </li> 3569 * <li><space><space>A --> <space>A </li> 3570 * <li>A<space> --> <space>A<space> </li> 3571 * <li>A<space>B --> <space>A<space><space>B </li> 3572 * </ul> 3573 * 3574 * This method use a finite state machine to parse the text. 3575 * 3576 * @param origin The String to modify 3577 * @return The modified String 3578 */ 3579 public static String insignificantSpacesStringInitial( char[] origin ) 3580 { 3581 if ( origin == null ) 3582 { 3583 // Special case : a null string is replaced by 1 space 3584 return " "; 3585 } 3586 3587 int pos = 0; 3588 3589 // Create a target char array which is longer than the original String, as we will 3590 // have 1 more spaces (one at the beginning, one at the end, and each space in the 3591 // middle will be doubled). 3592 char[] target = new char[origin.length * 2]; 3593 int newPos = 0; 3594 3595 NormStateEnum normState = NormStateEnum.START; 3596 3597 while ( normState != NormStateEnum.END ) 3598 { 3599 switch ( normState ) 3600 { 3601 case START : 3602 if ( pos == origin.length ) 3603 { 3604 // We are done, it's an empty string 3605 return " "; 3606 } 3607 3608 char c = origin[pos]; 3609 3610 if ( c == ' ' ) 3611 { 3612 pos++; 3613 normState = NormStateEnum.INITIAL_SPACES; 3614 } 3615 else 3616 { 3617 // First add a space 3618 target[newPos++] = ' '; 3619 3620 // Then the char 3621 target[newPos++] = c; 3622 3623 pos++; 3624 normState = NormStateEnum.INITIAL_CHAR; 3625 } 3626 3627 break; 3628 3629 case INITIAL_CHAR : 3630 if ( pos == origin.length ) 3631 { 3632 // We are done 3633 normState = NormStateEnum.END; 3634 3635 break; 3636 } 3637 3638 c = origin[pos]; 3639 3640 if ( c == ' ' ) 3641 { 3642 // Switch to the SPACES state 3643 pos++; 3644 normState = NormStateEnum.SPACES; 3645 } 3646 else 3647 { 3648 // Add the char 3649 target[newPos++] = c; 3650 pos++; 3651 normState = NormStateEnum.CHARS; 3652 } 3653 3654 break; 3655 3656 case INITIAL_SPACES : 3657 if ( pos == origin.length ) 3658 { 3659 // We are done, this is an empty String 3660 return " "; 3661 } 3662 3663 c = origin[pos]; 3664 3665 if ( c == ' ' ) 3666 { 3667 pos++; 3668 // Keep going with the current state 3669 } 3670 else 3671 { 3672 // Add a space 3673 target[newPos++] = ' '; 3674 3675 // Add the char 3676 target[newPos++] = c; 3677 pos++; 3678 normState = NormStateEnum.INITIAL_CHAR; 3679 } 3680 3681 break; 3682 3683 case CHARS : 3684 if ( pos == origin.length ) 3685 { 3686 // We are done 3687 normState = NormStateEnum.END; 3688 3689 break; 3690 } 3691 3692 c = origin[pos]; 3693 3694 if ( c == ' ' ) 3695 { 3696 pos++; 3697 normState = NormStateEnum.SPACES; 3698 } 3699 else 3700 { 3701 // Add the char 3702 target[newPos++] = c; 3703 pos++; 3704 // We keep going on the same state 3705 } 3706 3707 break; 3708 3709 case SPACES : 3710 if ( pos == origin.length ) 3711 { 3712 // We are done, add a Space 3713 target[newPos++] = ' '; 3714 normState = NormStateEnum.END; 3715 3716 break; 3717 } 3718 3719 c = origin[pos]; 3720 3721 if ( c == ' ' ) 3722 { 3723 pos++; 3724 // We keep going on the same state 3725 } 3726 else 3727 { 3728 // Add the two spaces 3729 target[newPos++] = ' '; 3730 target[newPos++] = ' '; 3731 3732 // Add the char 3733 target[newPos++] = c; 3734 pos++; 3735 3736 // Switch to SPACE_CHAR state 3737 normState = NormStateEnum.SPACE_CHAR; 3738 } 3739 3740 break; 3741 3742 case SPACE_CHAR : 3743 if ( pos == origin.length ) 3744 { 3745 // We are done 3746 normState = NormStateEnum.END; 3747 3748 break; 3749 } 3750 3751 c = origin[pos]; 3752 3753 if ( c == ' ' ) 3754 { 3755 pos++; 3756 3757 // Switch to Spaces state 3758 normState = NormStateEnum.SPACES; 3759 } 3760 else 3761 { 3762 // Add the char 3763 target[newPos++] = c; 3764 pos++; 3765 3766 // Switch to CHARS state 3767 normState = NormStateEnum.CHARS; 3768 } 3769 3770 break; 3771 3772 default : 3773 // Do nothing 3774 break; 3775 } 3776 } 3777 3778 // create the resulting String 3779 return new String( target, 0, newPos ); 3780 } 3781 3782 3783 /** 3784 * Remove all insignificant spaces in a Any assertion. A String starting with spaces 3785 * will start with exactly one space, every space in the middle will be doubled and if 3786 * there are spaces at the end, they will be replaced by one space : 3787 * <ul> 3788 * <li>A --> A </li> 3789 * <li><space>A --> <space>A </li> 3790 * <li><space><space>A --> <space>A </li> 3791 * <li>A<space> --> A<space> </li> 3792 * <li>A<space><space> --> A<space> </li> 3793 * <li>A<space>B --> A<space><space>B </li> 3794 * </ul> 3795 * 3796 * This method use a finite state machine to parse 3797 * the text. 3798 * 3799 * @param origin The String to modify 3800 * @return The modified String 3801 */ 3802 public static String insignificantSpacesStringAny( char[] origin ) 3803 { 3804 if ( origin == null ) 3805 { 3806 // Special case : a null strings is replaced by 1 space 3807 return " "; 3808 } 3809 3810 int pos = 0; 3811 3812 // Create a target char array which is longer than the original String, as we may have to add a space. 3813 char[] target = new char[origin.length * 2 + 1]; 3814 int newPos = 0; 3815 3816 NormStateEnum normState = NormStateEnum.START; 3817 3818 while ( normState != NormStateEnum.END ) 3819 { 3820 switch ( normState ) 3821 { 3822 case START : 3823 if ( pos == origin.length ) 3824 { 3825 // We are done, it's an empty string -> one space 3826 return " "; 3827 } 3828 3829 char c = origin[pos]; 3830 3831 if ( c == ' ' ) 3832 { 3833 pos++; 3834 normState = NormStateEnum.INITIAL_SPACES; 3835 } 3836 else 3837 { 3838 // Add the char 3839 target[newPos++] = c; 3840 3841 pos++; 3842 normState = NormStateEnum.INITIAL_CHAR; 3843 } 3844 3845 break; 3846 3847 case INITIAL_CHAR : 3848 if ( pos == origin.length ) 3849 { 3850 // We are done 3851 normState = NormStateEnum.END; 3852 3853 break; 3854 } 3855 3856 c = origin[pos]; 3857 3858 if ( c == ' ' ) 3859 { 3860 // Switch to the SPACES state, add a space in the target 3861 target[newPos++] = ' '; 3862 pos++; 3863 normState = NormStateEnum.SPACES; 3864 } 3865 else 3866 { 3867 // Add the char 3868 target[newPos++] = c; 3869 pos++; 3870 normState = NormStateEnum.CHARS; 3871 } 3872 3873 break; 3874 3875 case INITIAL_SPACES : 3876 if ( pos == origin.length ) 3877 { 3878 // We are done, this is an empty String -> one space 3879 return " "; 3880 } 3881 3882 c = origin[pos]; 3883 3884 if ( c == ' ' ) 3885 { 3886 pos++; 3887 // Keep going with the current state 3888 } 3889 else 3890 { 3891 // Add a space 3892 target[newPos++] = ' '; 3893 3894 // Add the char 3895 target[newPos++] = c; 3896 pos++; 3897 normState = NormStateEnum.INITIAL_CHAR; 3898 } 3899 3900 break; 3901 3902 case CHARS : 3903 if ( pos == origin.length ) 3904 { 3905 // We are done 3906 normState = NormStateEnum.END; 3907 3908 break; 3909 } 3910 3911 c = origin[pos]; 3912 3913 if ( c == ' ' ) 3914 { 3915 // Add the space 3916 target[newPos++] = ' '; 3917 3918 pos++; 3919 normState = NormStateEnum.SPACES; 3920 } 3921 else 3922 { 3923 // Add the char 3924 target[newPos++] = c; 3925 pos++; 3926 // We keep going on the same state 3927 } 3928 3929 break; 3930 3931 case SPACES : 3932 if ( pos == origin.length ) 3933 { 3934 // We are done 3935 normState = NormStateEnum.END; 3936 3937 break; 3938 } 3939 3940 c = origin[pos]; 3941 3942 if ( c == ' ' ) 3943 { 3944 pos++; 3945 // We keep going on the same state 3946 } 3947 else 3948 { 3949 // Add the second space 3950 target[newPos++] = ' '; 3951 3952 // Add the char 3953 target[newPos++] = c; 3954 pos++; 3955 3956 // Switch to SPACE_CHAR state 3957 normState = NormStateEnum.SPACE_CHAR; 3958 } 3959 3960 break; 3961 3962 case SPACE_CHAR : 3963 if ( pos == origin.length ) 3964 { 3965 // We are done 3966 normState = NormStateEnum.END; 3967 3968 break; 3969 } 3970 3971 c = origin[pos]; 3972 3973 if ( c == ' ' ) 3974 { 3975 pos++; 3976 3977 // Add the space 3978 target[newPos++] = ' '; 3979 3980 // Switch to Spaces state 3981 normState = NormStateEnum.SPACES; 3982 } 3983 else 3984 { 3985 // Add the char 3986 target[newPos++] = c; 3987 pos++; 3988 3989 // Switch to CHARS state 3990 normState = NormStateEnum.CHARS; 3991 } 3992 3993 break; 3994 3995 default : 3996 // Do nothing 3997 break; 3998 } 3999 } 4000 4001 // create the resulting String 4002 return new String( target, 0, newPos ); 4003 } 4004 4005 4006 /** 4007 * Remove all insignificant spaces in a string. 4008 * 4009 * This method use a finite state machine to parse 4010 * the text. 4011 * 4012 * @param origin The String to modify 4013 * @return The modified StringBuilder 4014 */ 4015 public static String insignificantSpacesStringFinal( char[] origin ) 4016 { 4017 if ( origin == null ) 4018 { 4019 // Special case : a null strings is replaced by 1 spaces 4020 return " "; 4021 } 4022 4023 int pos = 0; 4024 4025 // Create a target char array which is longer than the original String, as we will 4026 // have 2 more spaces (one at the beginning, one at the end, and each space in the 4027 // middle will be doubled). 4028 char[] target = new char[origin.length * 2 + 1]; 4029 int newPos = 0; 4030 4031 NormStateEnum normState = NormStateEnum.START; 4032 4033 while ( normState != NormStateEnum.END ) 4034 { 4035 switch ( normState ) 4036 { 4037 case START : 4038 if ( pos == origin.length ) 4039 { 4040 // We are done, it's an empty string 4041 return " "; 4042 } 4043 4044 char c = origin[pos]; 4045 4046 if ( c == ' ' ) 4047 { 4048 pos++; 4049 normState = NormStateEnum.INITIAL_SPACES; 4050 } 4051 else 4052 { 4053 // Add the char 4054 target[newPos++] = c; 4055 4056 pos++; 4057 normState = NormStateEnum.INITIAL_CHAR; 4058 } 4059 4060 break; 4061 4062 case INITIAL_CHAR : 4063 if ( pos == origin.length ) 4064 { 4065 // We are done, add a space 4066 target[newPos++] = ' '; 4067 normState = NormStateEnum.END; 4068 4069 break; 4070 } 4071 4072 c = origin[pos]; 4073 4074 if ( c == ' ' ) 4075 { 4076 // Switch to the SPACES state 4077 pos++; 4078 normState = NormStateEnum.SPACES; 4079 } 4080 else 4081 { 4082 // Add the char 4083 target[newPos++] = c; 4084 pos++; 4085 normState = NormStateEnum.CHARS; 4086 } 4087 4088 break; 4089 4090 case INITIAL_SPACES : 4091 if ( pos == origin.length ) 4092 { 4093 // We are done, this is an empty String 4094 return " "; 4095 } 4096 4097 c = origin[pos]; 4098 4099 if ( c == ' ' ) 4100 { 4101 pos++; 4102 // Keep going with the current state 4103 } 4104 else 4105 { 4106 // Add a space 4107 target[newPos++] = ' '; 4108 4109 // Add the char 4110 target[newPos++] = c; 4111 pos++; 4112 normState = NormStateEnum.INITIAL_CHAR; 4113 } 4114 4115 break; 4116 4117 case CHARS : 4118 if ( pos == origin.length ) 4119 { 4120 // We are done, add a Space 4121 target[newPos++] = ' '; 4122 normState = NormStateEnum.END; 4123 4124 break; 4125 } 4126 4127 c = origin[pos]; 4128 4129 if ( c == ' ' ) 4130 { 4131 pos++; 4132 normState = NormStateEnum.SPACES; 4133 } 4134 else 4135 { 4136 // Add the char 4137 target[newPos++] = c; 4138 pos++; 4139 // We keep going on the same state 4140 } 4141 4142 break; 4143 4144 case SPACES : 4145 if ( pos == origin.length ) 4146 { 4147 // We are done, add a Space 4148 target[newPos++] = ' '; 4149 normState = NormStateEnum.END; 4150 4151 break; 4152 } 4153 4154 c = origin[pos]; 4155 4156 if ( c == ' ' ) 4157 { 4158 pos++; 4159 // We keep going on the same state 4160 } 4161 else 4162 { 4163 // Add the two spaces 4164 target[newPos++] = ' '; 4165 target[newPos++] = ' '; 4166 4167 // Add the char 4168 target[newPos++] = c; 4169 pos++; 4170 4171 // Switch to SPACE_CHAR state 4172 normState = NormStateEnum.SPACE_CHAR; 4173 } 4174 4175 break; 4176 4177 case SPACE_CHAR : 4178 if ( pos == origin.length ) 4179 { 4180 // We are done, add a Space 4181 target[newPos++] = ' '; 4182 normState = NormStateEnum.END; 4183 4184 break; 4185 } 4186 4187 c = origin[pos]; 4188 4189 if ( c == ' ' ) 4190 { 4191 pos++; 4192 4193 // Switch to Spaces state 4194 normState = NormStateEnum.SPACES; 4195 } 4196 else 4197 { 4198 // Add the char 4199 target[newPos++] = c; 4200 pos++; 4201 4202 // Switch to CHARS state 4203 normState = NormStateEnum.CHARS; 4204 } 4205 4206 break; 4207 4208 default : 4209 // Do nothing 4210 break; 4211 } 4212 } 4213 4214 // create the resulting String 4215 return new String( target, 0, newPos ); 4216 } 4217 4218 4219 /** 4220 * Map for Ascii String, case insensitive 4221 * 4222 * @param unicode The string to map 4223 * @return The lower cased string 4224 */ 4225 private static String mapIgnoreCaseAscii( String unicode ) 4226 { 4227 char[] source = unicode.toCharArray(); 4228 int pos = 0; 4229 4230 for ( char c : source ) 4231 { 4232 switch ( c ) 4233 { 4234 case 0x0000: 4235 case 0x0001: 4236 case 0x0002: 4237 case 0x0003: 4238 case 0x0004: 4239 case 0x0005: 4240 case 0x0006: 4241 case 0x0007: 4242 case 0x0008: 4243 // All other control code (e.g., Cc) points or code points with a 4244 // control function (e.g., Cf) are mapped to nothing. The following is 4245 // a complete list of these code points: U+0000-0008... 4246 break; 4247 4248 case 0x0009: 4249 case 0x000A: 4250 case 0x000B: 4251 case 0x000C: 4252 case 0x000D: 4253 // CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 4254 // TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 4255 // (U+000D), ... are mapped to SPACE (U+0020). 4256 source[pos++] = 0x0020; 4257 break; 4258 4259 case 0x000E: 4260 case 0x000F: 4261 case 0x0010: 4262 case 0x0011: 4263 case 0x0012: 4264 case 0x0013: 4265 case 0x0014: 4266 case 0x0015: 4267 case 0x0016: 4268 case 0x0017: 4269 case 0x0018: 4270 case 0x0019: 4271 case 0x001A: 4272 case 0x001B: 4273 case 0x001C: 4274 case 0x001D: 4275 case 0x001E: 4276 case 0x001F: 4277 // All other control code (e.g., Cc) points or code points with a 4278 // control function (e.g., Cf) are mapped to nothing. The following is 4279 // a complete list of these code points: ... U+000E-001F... 4280 break; 4281 4282 case 0x0020: 4283 case 0x0021: 4284 case 0x0022: 4285 case 0x0023: 4286 case 0x0024: 4287 case 0x0025: 4288 case 0x0026: 4289 case 0x0027: 4290 case 0x0028: 4291 case 0x0029: 4292 case 0x002A: 4293 case 0x002B: 4294 case 0x002C: 4295 case 0x002D: 4296 case 0x002E: 4297 case 0x002F: 4298 case 0x0030: 4299 case 0x0031: 4300 case 0x0032: 4301 case 0x0033: 4302 case 0x0034: 4303 case 0x0035: 4304 case 0x0036: 4305 case 0x0037: 4306 case 0x0038: 4307 case 0x0039: 4308 case 0x003A: 4309 case 0x003B: 4310 case 0x003C: 4311 case 0x003D: 4312 case 0x003E: 4313 case 0x003F: 4314 case 0x0040: 4315 source[pos++] = c; 4316 break; 4317 4318 case 0x0041: 4319 case 0x0042: 4320 case 0x0043: 4321 case 0x0044: 4322 case 0x0045: 4323 case 0x0046: 4324 case 0x0047: 4325 case 0x0048: 4326 case 0x0049: 4327 case 0x004A: 4328 case 0x004B: 4329 case 0x004C: 4330 case 0x004D: 4331 case 0x004E: 4332 case 0x004F: 4333 case 0x0050: 4334 case 0x0051: 4335 case 0x0052: 4336 case 0x0053: 4337 case 0x0054: 4338 case 0x0055: 4339 case 0x0056: 4340 case 0x0057: 4341 case 0x0058: 4342 case 0x0059: 4343 case 0x005A: 4344 // For case ignore, numeric, and stored prefix string matching rules, 4345 // characters are case folded per B.2 of [RFC3454] : U+0041-005A 4346 source[pos++] = ( char ) ( c + 0x0020 ); 4347 break; 4348 4349 case 0x005B: 4350 case 0x005C: 4351 case 0x005D: 4352 case 0x005E: 4353 case 0x005F: 4354 case 0x0060: 4355 case 0x0061: 4356 case 0x0062: 4357 case 0x0063: 4358 case 0x0064: 4359 case 0x0065: 4360 case 0x0066: 4361 case 0x0067: 4362 case 0x0068: 4363 case 0x0069: 4364 case 0x006A: 4365 case 0x006B: 4366 case 0x006C: 4367 case 0x006D: 4368 case 0x006E: 4369 case 0x006F: 4370 case 0x0070: 4371 case 0x0071: 4372 case 0x0072: 4373 case 0x0073: 4374 case 0x0074: 4375 case 0x0075: 4376 case 0x0076: 4377 case 0x0077: 4378 case 0x0078: 4379 case 0x0079: 4380 case 0x007A: 4381 case 0x007B: 4382 case 0x007C: 4383 case 0x007D: 4384 case 0x007E: 4385 source[pos++] = c; 4386 break; 4387 4388 case 0x007F: 4389 // All other control code (e.g., Cc) points or code points with a 4390 // control function (e.g., Cf) are mapped to nothing. The following is 4391 // a complete list of these code points: ... U+007F-0084... 4392 break; 4393 4394 default : 4395 throw AIOOBE; 4396 } 4397 } 4398 4399 return new String( source, 0, pos ); 4400 } 4401 4402 4403 /** 4404 * Map for Ascii String, case sensitive 4405 * 4406 * @param unicode The string to re-map 4407 * @return The mapped string 4408 */ 4409 private static String mapCaseSensitiveAscii( String unicode ) 4410 { 4411 char[] source = unicode.toCharArray(); 4412 int pos = 0; 4413 4414 for ( char c : source ) 4415 { 4416 switch ( c ) 4417 { 4418 case 0x0000: 4419 case 0x0001: 4420 case 0x0002: 4421 case 0x0003: 4422 case 0x0004: 4423 case 0x0005: 4424 case 0x0006: 4425 case 0x0007: 4426 case 0x0008: 4427 // All other control code (e.g., Cc) points or code points with a 4428 // control function (e.g., Cf) are mapped to nothing. The following is 4429 // a complete list of these code points: U+0000-0008... 4430 break; 4431 4432 case 0x0009: 4433 case 0x000A: 4434 case 0x000B: 4435 case 0x000C: 4436 case 0x000D: 4437 // CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 4438 // TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 4439 // (U+000D), ... are mapped to SPACE (U+0020). 4440 source[pos++] = 0x0020; 4441 break; 4442 4443 case 0x000E: 4444 case 0x000F: 4445 case 0x0010: 4446 case 0x0011: 4447 case 0x0012: 4448 case 0x0013: 4449 case 0x0014: 4450 case 0x0015: 4451 case 0x0016: 4452 case 0x0017: 4453 case 0x0018: 4454 case 0x0019: 4455 case 0x001A: 4456 case 0x001B: 4457 case 0x001C: 4458 case 0x001D: 4459 case 0x001E: 4460 case 0x001F: 4461 // All other control code (e.g., Cc) points or code points with a 4462 // control function (e.g., Cf) are mapped to nothing. The following is 4463 // a complete list of these code points: ... U+000E-001F... 4464 break; 4465 4466 case 0x0020: 4467 case 0x0021: 4468 case 0x0022: 4469 case 0x0023: 4470 case 0x0024: 4471 case 0x0025: 4472 case 0x0026: 4473 case 0x0027: 4474 case 0x0028: 4475 case 0x0029: 4476 case 0x002A: 4477 case 0x002B: 4478 case 0x002C: 4479 case 0x002D: 4480 case 0x002E: 4481 case 0x002F: 4482 case 0x0030: 4483 case 0x0031: 4484 case 0x0032: 4485 case 0x0033: 4486 case 0x0034: 4487 case 0x0035: 4488 case 0x0036: 4489 case 0x0037: 4490 case 0x0038: 4491 case 0x0039: 4492 case 0x003A: 4493 case 0x003B: 4494 case 0x003C: 4495 case 0x003D: 4496 case 0x003E: 4497 case 0x003F: 4498 case 0x0040: 4499 source[pos++] = c; 4500 break; 4501 4502 case 0x0041: 4503 case 0x0042: 4504 case 0x0043: 4505 case 0x0044: 4506 case 0x0045: 4507 case 0x0046: 4508 case 0x0047: 4509 case 0x0048: 4510 case 0x0049: 4511 case 0x004A: 4512 case 0x004B: 4513 case 0x004C: 4514 case 0x004D: 4515 case 0x004E: 4516 case 0x004F: 4517 case 0x0050: 4518 case 0x0051: 4519 case 0x0052: 4520 case 0x0053: 4521 case 0x0054: 4522 case 0x0055: 4523 case 0x0056: 4524 case 0x0057: 4525 case 0x0058: 4526 case 0x0059: 4527 case 0x005A: 4528 case 0x005B: 4529 case 0x005C: 4530 case 0x005D: 4531 case 0x005E: 4532 case 0x005F: 4533 case 0x0060: 4534 case 0x0061: 4535 case 0x0062: 4536 case 0x0063: 4537 case 0x0064: 4538 case 0x0065: 4539 case 0x0066: 4540 case 0x0067: 4541 case 0x0068: 4542 case 0x0069: 4543 case 0x006A: 4544 case 0x006B: 4545 case 0x006C: 4546 case 0x006D: 4547 case 0x006E: 4548 case 0x006F: 4549 case 0x0070: 4550 case 0x0071: 4551 case 0x0072: 4552 case 0x0073: 4553 case 0x0074: 4554 case 0x0075: 4555 case 0x0076: 4556 case 0x0077: 4557 case 0x0078: 4558 case 0x0079: 4559 case 0x007A: 4560 case 0x007B: 4561 case 0x007C: 4562 case 0x007D: 4563 case 0x007E: 4564 source[pos++] = c; 4565 break; 4566 4567 case 0x007F: 4568 // All other control code (e.g., Cc) points or code points with a 4569 // control function (e.g., Cf) are mapped to nothing. The following is 4570 // a complete list of these code points: ... U+007F-0084... 4571 break; 4572 4573 default : 4574 throw AIOOBE; 4575 } 4576 } 4577 4578 return new String( source, 0, pos ); 4579 } 4580 4581 4582 /** 4583 * Apply the RFC 4518 MAP transformation, case insensitive 4584 * 4585 * @param unicode The original String 4586 * @return The mapped String 4587 */ 4588 public static String mapIgnoreCase( String unicode ) 4589 { 4590 try 4591 { 4592 return mapIgnoreCaseAscii( unicode ); 4593 } 4594 catch ( ArrayIndexOutOfBoundsException aioobe ) 4595 { 4596 // There 4597 } 4598 4599 char[] source = unicode.toCharArray(); 4600 4601 // Create a target char array which is 3 times bigger than the original size. 4602 // We have to do that because the map phase may transform a char to 4603 // three chars. 4604 // TODO : we have to find a way to prevent this waste of space. 4605 char[] target = new char[unicode.length() * 3 + 2]; 4606 4607 int limit = 0; 4608 4609 for ( char c : source ) 4610 { 4611 switch ( c ) 4612 { 4613 case 0x0000: 4614 case 0x0001: 4615 case 0x0002: 4616 case 0x0003: 4617 case 0x0004: 4618 case 0x0005: 4619 case 0x0006: 4620 case 0x0007: 4621 case 0x0008: 4622 // All other control code (e.g., Cc) points or code points with a 4623 // control function (e.g., Cf) are mapped to nothing. The following is 4624 // a complete list of these code points: U+0000-0008... 4625 break; 4626 4627 case 0x0009: 4628 case 0x000A: 4629 case 0x000B: 4630 case 0x000C: 4631 case 0x000D: 4632 // CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 4633 // TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 4634 // (U+000D), ... are mapped to SPACE (U+0020). 4635 target[limit++] = 0x0020; 4636 break; 4637 4638 case 0x000E: 4639 case 0x000F: 4640 case 0x0010: 4641 case 0x0011: 4642 case 0x0012: 4643 case 0x0013: 4644 case 0x0014: 4645 case 0x0015: 4646 case 0x0016: 4647 case 0x0017: 4648 case 0x0018: 4649 case 0x0019: 4650 case 0x001A: 4651 case 0x001B: 4652 case 0x001C: 4653 case 0x001D: 4654 case 0x001E: 4655 case 0x001F: 4656 // All other control code (e.g., Cc) points or code points with a 4657 // control function (e.g., Cf) are mapped to nothing. The following is 4658 // a complete list of these code points: ... U+000E-001F... 4659 break; 4660 4661 case 0x0041: 4662 case 0x0042: 4663 case 0x0043: 4664 case 0x0044: 4665 case 0x0045: 4666 case 0x0046: 4667 case 0x0047: 4668 case 0x0048: 4669 case 0x0049: 4670 case 0x004A: 4671 case 0x004B: 4672 case 0x004C: 4673 case 0x004D: 4674 case 0x004E: 4675 case 0x004F: 4676 case 0x0050: 4677 case 0x0051: 4678 case 0x0052: 4679 case 0x0053: 4680 case 0x0054: 4681 case 0x0055: 4682 case 0x0056: 4683 case 0x0057: 4684 case 0x0058: 4685 case 0x0059: 4686 case 0x005A: 4687 // For case ignore, numeric, and stored prefix string matching rules, 4688 // characters are case folded per B.2 of [RFC3454] : U+0041-005A 4689 target[limit++] = ( char ) ( c + 0x0020 ); 4690 break; 4691 4692 case 0x0061: 4693 case 0x0062: 4694 case 0x0063: 4695 case 0x0064: 4696 case 0x0065: 4697 case 0x0066: 4698 case 0x0067: 4699 case 0x0068: 4700 case 0x0069: 4701 case 0x006A: 4702 case 0x006B: 4703 case 0x006C: 4704 case 0x006D: 4705 case 0x006E: 4706 case 0x006F: 4707 case 0x0070: 4708 case 0x0071: 4709 case 0x0072: 4710 case 0x0073: 4711 case 0x0074: 4712 case 0x0075: 4713 case 0x0076: 4714 case 0x0077: 4715 case 0x0078: 4716 case 0x0079: 4717 case 0x007A: 4718 target[limit++] = c; 4719 break; 4720 4721 case 0x007F: 4722 case 0x0080: 4723 case 0x0081: 4724 case 0x0082: 4725 case 0x0083: 4726 case 0x0084: 4727 // All other control code (e.g., Cc) points or code points with a 4728 // control function (e.g., Cf) are mapped to nothing. The following is 4729 // a complete list of these code points: ... U+007F-0084... 4730 break; 4731 4732 case 0x0085: 4733 // ... and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). 4734 target[limit++] = 0x0020; 4735 break; 4736 4737 case 0x0086: 4738 case 0x0087: 4739 case 0x0088: 4740 case 0x0089: 4741 case 0x008A: 4742 case 0x008B: 4743 case 0x008C: 4744 case 0x008D: 4745 case 0x008E: 4746 case 0x008F: 4747 case 0x0090: 4748 case 0x0091: 4749 case 0x0092: 4750 case 0x0093: 4751 case 0x0094: 4752 case 0x0095: 4753 case 0x0096: 4754 case 0x0097: 4755 case 0x0098: 4756 case 0x0099: 4757 case 0x009A: 4758 case 0x009B: 4759 case 0x009C: 4760 case 0x009D: 4761 case 0x009E: 4762 case 0x009F: 4763 // All other control code (e.g., Cc) points or code points with a 4764 // control function (e.g., Cf) are mapped to nothing. The following is 4765 // a complete list of these code points: ... U+0086-009F... 4766 break; 4767 4768 case 0x00A0: 4769 // All other code points with Separator (space, line, or paragraph) property 4770 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 4771 // list of these code points: ... 00A0 ... 4772 target[limit++] = 0x0020; 4773 break; 4774 4775 case 0x00AD: 4776 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 4777 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 4778 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 4779 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 4780 // mapped to nothing. 4781 break; 4782 4783 case 0x00B5: 4784 // For case ignore, numeric, and stored prefix string matching rules, 4785 // characters are case folded per B.2 of [RFC3454] : U+00B5 4786 target[limit++] = 0x03BC; 4787 break; 4788 4789 case 0x00C0: 4790 case 0x00C1: 4791 case 0x00C2: 4792 case 0x00C3: 4793 case 0x00C4: 4794 case 0x00C5: 4795 case 0x00C6: 4796 case 0x00C7: 4797 case 0x00C8: 4798 case 0x00C9: 4799 case 0x00CA: 4800 case 0x00CB: 4801 case 0x00CC: 4802 case 0x00CD: 4803 case 0x00CE: 4804 case 0x00CF: 4805 case 0x00D0: 4806 case 0x00D1: 4807 case 0x00D2: 4808 case 0x00D3: 4809 case 0x00D4: 4810 case 0x00D5: 4811 // no 0x00D7 4812 case 0x00D6: 4813 case 0x00D8: 4814 case 0x00D9: 4815 case 0x00DA: 4816 case 0x00DB: 4817 case 0x00DC: 4818 case 0x00DD: 4819 case 0x00DE: 4820 // For case ignore, numeric, and stored prefix string matching rules, 4821 // characters are case folded per B.2 of [RFC3454] : U+00C0-00D6, 4822 // U+00D8-00DE 4823 target[limit++] = ( char ) ( c + 0x0020 ); 4824 break; 4825 4826 case 0x00DF: 4827 // For case ignore, numeric, and stored prefix string matching rules, 4828 // characters are case folded per B.2 of [RFC3454] : U+00DF 4829 target[limit++] = 0x0073; 4830 target[limit++] = 0x0073; 4831 break; 4832 4833 case 0x0100: 4834 case 0x0102: 4835 case 0x0104: 4836 case 0x0106: 4837 case 0x0108: 4838 case 0x010A: 4839 case 0x010C: 4840 case 0x010E: 4841 case 0x0110: 4842 case 0x0112: 4843 case 0x0114: 4844 case 0x0116: 4845 case 0x0118: 4846 case 0x011A: 4847 case 0x011C: 4848 case 0x011E: 4849 case 0x0120: 4850 case 0x0122: 4851 case 0x0124: 4852 case 0x0126: 4853 case 0x0128: 4854 case 0x012A: 4855 case 0x012C: 4856 case 0x012E: 4857 // For case ignore, numeric, and stored prefix string matching rules, 4858 // characters are case folded per B.2 of [RFC3454] : U+0100-012E 4859 target[limit++] = ( char ) ( c + 0x0001 ); 4860 break; 4861 4862 case 0x0130: 4863 // For case ignore, numeric, and stored prefix string matching rules, 4864 // characters are case folded per B.2 of [RFC3454] : U+0130 4865 target[limit++] = 0x0069; 4866 target[limit++] = 0x0307; 4867 break; 4868 4869 case 0x0132: 4870 case 0x0134: 4871 case 0x0136: 4872 case 0x0139: 4873 case 0x013B: 4874 case 0x013D: 4875 case 0x013F: 4876 case 0x0141: 4877 case 0x0143: 4878 case 0x0145: 4879 case 0x0147: 4880 // For case ignore, numeric, and stored prefix string matching rules, 4881 // characters are case folded per B.2 of [RFC3454] : U+0132-0147 4882 target[limit++] = ( char ) ( c + 0x0001 ); 4883 break; 4884 4885 case 0x0149: 4886 // For case ignore, numeric, and stored prefix string matching rules, 4887 // characters are case folded per B.2 of [RFC3454] : U+0149 4888 target[limit++] = 0x02BC; 4889 target[limit++] = 0x006E; 4890 break; 4891 4892 case 0x014A: 4893 case 0x014C: 4894 case 0x014E: 4895 case 0x0150: 4896 case 0x0152: 4897 case 0x0154: 4898 case 0x0156: 4899 case 0x0158: 4900 case 0x015A: 4901 case 0x015C: 4902 case 0x015E: 4903 case 0x0160: 4904 case 0x0162: 4905 case 0x0164: 4906 case 0x0166: 4907 case 0x0168: 4908 case 0x016A: 4909 case 0x016C: 4910 case 0x016E: 4911 case 0x0170: 4912 case 0x0172: 4913 case 0x0174: 4914 case 0x0176: 4915 // For case ignore, numeric, and stored prefix string matching rules, 4916 // characters are case folded per B.2 of [RFC3454] : U+0141-0176 4917 target[limit++] = ( char ) ( c + 0x0001 ); 4918 break; 4919 4920 case 0x0178: 4921 // For case ignore, numeric, and stored prefix string matching rules, 4922 // characters are case folded per B.2 of [RFC3454] : U+0178 4923 target[limit++] = 0x00FF; 4924 break; 4925 4926 case 0x0179: 4927 case 0x017B: 4928 case 0x017D: 4929 // For case ignore, numeric, and stored prefix string matching rules, 4930 // characters are case folded per B.2 of [RFC3454] : U+0179-017D 4931 target[limit++] = ( char ) ( c + 0x0001 ); 4932 break; 4933 4934 case 0x017F: 4935 // For case ignore, numeric, and stored prefix string matching rules, 4936 // characters are case folded per B.2 of [RFC3454] : U+017F 4937 target[limit++] = 0x0073; 4938 break; 4939 4940 case 0x0181: 4941 // For case ignore, numeric, and stored prefix string matching rules, 4942 // characters are case folded per B.2 of [RFC3454] : U+0181 4943 target[limit++] = 0x0253; 4944 break; 4945 4946 case 0x0182: 4947 case 0x0184: 4948 // For case ignore, numeric, and stored prefix string matching rules, 4949 // characters are case folded per B.2 of [RFC3454] : U+0182, U+0x0184 4950 target[limit++] = ( char ) ( c + 0x0001 ); 4951 break; 4952 4953 case 0x0186: 4954 // For case ignore, numeric, and stored prefix string matching rules, 4955 // characters are case folded per B.2 of [RFC3454] : U+0186 4956 target[limit++] = 0x0254; 4957 break; 4958 4959 case 0x0187: 4960 // For case ignore, numeric, and stored prefix string matching rules, 4961 // characters are case folded per B.2 of [RFC3454] : U+0188 4962 target[limit++] = 0x0188; 4963 break; 4964 4965 case 0x0189: 4966 case 0x018A: 4967 // For case ignore, numeric, and stored prefix string matching rules, 4968 // characters are case folded per B.2 of [RFC3454] : U+0189, U+018A 4969 target[limit++] = ( char ) ( c + 0x00CD ); 4970 break; 4971 4972 case 0x018B: 4973 // For case ignore, numeric, and stored prefix string matching rules, 4974 // characters are case folded per B.2 of [RFC3454] : U+018B 4975 target[limit++] = 0x018C; 4976 break; 4977 4978 case 0x018E: 4979 // For case ignore, numeric, and stored prefix string matching rules, 4980 // characters are case folded per B.2 of [RFC3454] : U+018E 4981 target[limit++] = 0x01DD; 4982 break; 4983 4984 case 0x018F: 4985 // For case ignore, numeric, and stored prefix string matching rules, 4986 // characters are case folded per B.2 of [RFC3454] : U+018F 4987 target[limit++] = 0x0259; 4988 break; 4989 4990 case 0x0190: 4991 // For case ignore, numeric, and stored prefix string matching rules, 4992 // characters are case folded per B.2 of [RFC3454] : U+0190 4993 target[limit++] = 0x025B; 4994 break; 4995 4996 case 0x0191: 4997 // For case ignore, numeric, and stored prefix string matching rules, 4998 // characters are case folded per B.2 of [RFC3454] : U+0191 4999 target[limit++] = 0x0192; 5000 break; 5001 5002 case 0x0193: 5003 // For case ignore, numeric, and stored prefix string matching rules, 5004 // characters are case folded per B.2 of [RFC3454] : U+0193 5005 target[limit++] = 0x0260; 5006 break; 5007 5008 case 0x0194: 5009 // For case ignore, numeric, and stored prefix string matching rules, 5010 // characters are case folded per B.2 of [RFC3454] : U+0194 5011 target[limit++] = 0x0263; 5012 break; 5013 5014 case 0x0196: 5015 // For case ignore, numeric, and stored prefix string matching rules, 5016 // characters are case folded per B.2 of [RFC3454] : U+0196 5017 target[limit++] = 0x0269; 5018 break; 5019 5020 case 0x0197: 5021 // For case ignore, numeric, and stored prefix string matching rules, 5022 // characters are case folded per B.2 of [RFC3454] : U+0197 5023 target[limit++] = 0x0268; 5024 break; 5025 5026 case 0x0198: 5027 // For case ignore, numeric, and stored prefix string matching rules, 5028 // characters are case folded per B.2 of [RFC3454] : U+0198 5029 target[limit++] = 0x0199; 5030 break; 5031 5032 case 0x019C: 5033 // For case ignore, numeric, and stored prefix string matching rules, 5034 // characters are case folded per B.2 of [RFC3454] : U+019C 5035 target[limit++] = 0x026F; 5036 break; 5037 5038 case 0x019D: 5039 // For case ignore, numeric, and stored prefix string matching rules, 5040 // characters are case folded per B.2 of [RFC3454] : U+019D 5041 target[limit++] = 0x0272; 5042 break; 5043 5044 case 0x019F: 5045 // For case ignore, numeric, and stored prefix string matching rules, 5046 // characters are case folded per B.2 of [RFC3454] : U+019F 5047 target[limit++] = 0x0275; 5048 break; 5049 5050 case 0x01A0: 5051 case 0x01A2: 5052 case 0x01A4: 5053 // For case ignore, numeric, and stored prefix string matching rules, 5054 // characters are case folded per B.2 of [RFC3454] : U+01A0-U+01A4 5055 target[limit++] = ( char ) ( c + 0x0001 ); 5056 break; 5057 5058 case 0x01A6: 5059 // For case ignore, numeric, and stored prefix string matching rules, 5060 // characters are case folded per B.2 of [RFC3454] : U+01A6 5061 target[limit++] = 0x0280; 5062 break; 5063 5064 case 0x01A7: 5065 // For case ignore, numeric, and stored prefix string matching rules, 5066 // characters are case folded per B.2 of [RFC3454] : U+01A7 5067 target[limit++] = 0x01A8; 5068 break; 5069 5070 case 0x01A9: 5071 // For case ignore, numeric, and stored prefix string matching rules, 5072 // characters are case folded per B.2 of [RFC3454] : U+01A9 5073 target[limit++] = 0x0283; 5074 break; 5075 5076 case 0x01AC: 5077 // For case ignore, numeric, and stored prefix string matching rules, 5078 // characters are case folded per B.2 of [RFC3454] : U+01AC 5079 target[limit++] = 0x01AD; 5080 break; 5081 5082 case 0x01AE: 5083 // For case ignore, numeric, and stored prefix string matching rules, 5084 // characters are case folded per B.2 of [RFC3454] : U+01AE 5085 target[limit++] = 0x0288; 5086 break; 5087 5088 case 0x01AF: 5089 // For case ignore, numeric, and stored prefix string matching rules, 5090 // characters are case folded per B.2 of [RFC3454] : U+01AF 5091 target[limit++] = 0x01B0; 5092 break; 5093 5094 case 0x01B1: 5095 case 0x01B2: 5096 // For case ignore, numeric, and stored prefix string matching rules, 5097 // characters are case folded per B.2 of [RFC3454] : U+01AF, U+01B2 5098 target[limit++] = ( char ) ( c + 0x00D9 ); 5099 break; 5100 5101 case 0x01B3: 5102 case 0x01B5: 5103 // For case ignore, numeric, and stored prefix string matching rules, 5104 // characters are case folded per B.2 of [RFC3454] : U+01B3, U+01B5 5105 target[limit++] = ( char ) ( c + 0x0001 ); 5106 break; 5107 5108 case 0x01B7: 5109 // For case ignore, numeric, and stored prefix string matching rules, 5110 // characters are case folded per B.2 of [RFC3454] : U+01B7 5111 target[limit++] = 0x0292; 5112 break; 5113 5114 case 0x01B8: 5115 case 0x01BC: 5116 // For case ignore, numeric, and stored prefix string matching rules, 5117 // characters are case folded per B.2 of [RFC3454] : U+01B8, U+01BC 5118 target[limit++] = ( char ) ( c + 0x0001 ); 5119 break; 5120 5121 case 0x01C4: 5122 // For case ignore, numeric, and stored prefix string matching rules, 5123 // characters are case folded per B.2 of [RFC3454] : U+01C4,U+01C5 5124 target[limit++] = 0x01C6; 5125 break; 5126 5127 case 0x01C7: 5128 // For case ignore, numeric, and stored prefix string matching rules, 5129 // characters are case folded per B.2 of [RFC3454] : U+01C7,U+01C8 5130 target[limit++] = 0x01C9; 5131 break; 5132 5133 case 0x01CA: 5134 case 0x01CB: 5135 // For case ignore, numeric, and stored prefix string matching rules, 5136 // characters are case folded per B.2 of [RFC3454] : U+01CA,U+01CB 5137 target[limit++] = 0x01CC; 5138 break; 5139 5140 case 0x01CD: 5141 case 0x01CF: 5142 case 0x01D1: 5143 case 0x01D3: 5144 case 0x01D5: 5145 case 0x01D7: 5146 case 0x01D9: 5147 case 0x01DB: 5148 case 0x01DE: 5149 case 0x01E0: 5150 case 0x01E2: 5151 case 0x01E4: 5152 case 0x01E6: 5153 case 0x01E8: 5154 case 0x01EA: 5155 case 0x01EC: 5156 case 0x01EE: 5157 // For case ignore, numeric, and stored prefix string matching rules, 5158 // characters are case folded per B.2 of [RFC3454] : U+01CD, U+01EE 5159 target[limit++] = ( char ) ( c + 0x0001 ); 5160 break; 5161 5162 case 0x01F0: 5163 // For case ignore, numeric, and stored prefix string matching rules, 5164 // characters are case folded per B.2 of [RFC3454] : U+01F0 5165 target[limit++] = 0x006A; 5166 target[limit++] = 0x030C; 5167 break; 5168 5169 case 0x01F1: 5170 case 0x01F2: 5171 // For case ignore, numeric, and stored prefix string matching rules, 5172 // characters are case folded per B.2 of [RFC3454] : U+01F1, U+01F2 5173 target[limit++] = 0x01F3; 5174 break; 5175 5176 case 0x01F4: 5177 // For case ignore, numeric, and stored prefix string matching rules, 5178 // characters are case folded per B.2 of [RFC3454] : U+01F4 5179 target[limit++] = 0x01F5; 5180 break; 5181 5182 case 0x01F6: 5183 // For case ignore, numeric, and stored prefix string matching rules, 5184 // characters are case folded per B.2 of [RFC3454] : U+01F6 5185 target[limit++] = 0x0195; 5186 break; 5187 5188 case 0x01F7: 5189 // For case ignore, numeric, and stored prefix string matching rules, 5190 // characters are case folded per B.2 of [RFC3454] : U+01F7 5191 target[limit++] = 0x01BF; 5192 break; 5193 5194 case 0x01F8: 5195 case 0x01FA: 5196 case 0x01FC: 5197 case 0x01FE: 5198 case 0x0200: 5199 case 0x0202: 5200 case 0x0204: 5201 case 0x0206: 5202 case 0x0208: 5203 case 0x020A: 5204 case 0x020C: 5205 case 0x020E: 5206 case 0x0210: 5207 case 0x0212: 5208 case 0x0214: 5209 case 0x0216: 5210 case 0x0218: 5211 case 0x021A: 5212 case 0x021C: 5213 case 0x021E: 5214 // For case ignore, numeric, and stored prefix string matching rules, 5215 // characters are case folded per B.2 of [RFC3454] : U+01F8-U+021E 5216 target[limit++] = ( char ) ( c + 0x0001 ); 5217 break; 5218 5219 5220 case 0x0220: 5221 // For case ignore, numeric, and stored prefix string matching rules, 5222 // characters are case folded per B.2 of [RFC3454] : U+0220 5223 target[limit++] = 0x019E; 5224 break; 5225 5226 case 0x0222: 5227 case 0x0224: 5228 case 0x0226: 5229 case 0x0228: 5230 case 0x022A: 5231 case 0x022C: 5232 case 0x022E: 5233 case 0x0230: 5234 case 0x0232: 5235 // For case ignore, numeric, and stored prefix string matching rules, 5236 // characters are case folded per B.2 of [RFC3454] : U+0222-U+0232 5237 target[limit++] = ( char ) ( c + 0x0001 ); 5238 break; 5239 5240 case 0x0345: 5241 // For case ignore, numeric, and stored prefix string matching rules, 5242 // characters are case folded per B.2 of [RFC3454] : U+0220 5243 target[limit++] = 0x03B9; 5244 break; 5245 5246 case 0x034F: 5247 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 5248 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 5249 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 5250 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 5251 // mapped to nothing. 5252 break; 5253 5254 case 0x037A: 5255 // For case ignore, numeric, and stored prefix string matching rules, 5256 // characters are case folded per B.2 of [RFC3454] : U+037A 5257 target[limit++] = 0x0020; 5258 target[limit++] = 0x03B9; 5259 break; 5260 5261 case 0x0386: 5262 // For case ignore, numeric, and stored prefix string matching rules, 5263 // characters are case folded per B.2 of [RFC3454] : U+0386 5264 target[limit++] = 0x03AC; 5265 break; 5266 5267 case 0x0388: 5268 case 0x0389: 5269 case 0x038A: 5270 // For case ignore, numeric, and stored prefix string matching rules, 5271 // characters are case folded per B.2 of [RFC3454] : U+0388, U+0389, U+038A 5272 target[limit++] = ( char ) ( c + 0x0025 ); 5273 break; 5274 5275 case 0x038C: 5276 // For case ignore, numeric, and stored prefix string matching rules, 5277 // characters are case folded per B.2 of [RFC3454] : U+038C 5278 target[limit++] = 0x03CC; 5279 break; 5280 5281 case 0x038E: 5282 case 0x038F: 5283 // For case ignore, numeric, and stored prefix string matching rules, 5284 // characters are case folded per B.2 of [RFC3454] : U+038E, U+038F 5285 target[limit++] = ( char ) ( c + 0x0025 ); 5286 break; 5287 5288 case 0x0390: 5289 // For case ignore, numeric, and stored prefix string matching rules, 5290 // characters are case folded per B.2 of [RFC3454] : U+0390 5291 target[limit++] = 0x03B9; 5292 target[limit++] = 0x0308; 5293 target[limit++] = 0x0301; 5294 break; 5295 5296 case 0x0391: 5297 case 0x0392: 5298 case 0x0393: 5299 case 0x0394: 5300 case 0x0395: 5301 case 0x0396: 5302 case 0x0397: 5303 case 0x0398: 5304 case 0x0399: 5305 case 0x039A: 5306 case 0x039B: 5307 case 0x039C: 5308 case 0x039D: 5309 case 0x039E: 5310 case 0x039F: 5311 case 0x03A0: 5312 case 0x03A1: 5313 case 0x03A3: 5314 case 0x03A4: 5315 case 0x03A5: 5316 case 0x03A6: 5317 case 0x03A7: 5318 case 0x03A8: 5319 case 0x03A9: 5320 case 0x03AA: 5321 case 0x03AB: 5322 // For case ignore, numeric, and stored prefix string matching rules, 5323 // characters are case folded per B.2 of [RFC3454] : U+0391-U+03AB 5324 target[limit++] = ( char ) ( c + 0x0020 ); 5325 break; 5326 5327 5328 case 0x03B0: 5329 // For case ignore, numeric, and stored prefix string matching rules, 5330 // characters are case folded per B.2 of [RFC3454] : U+03B0 5331 target[limit++] = 0x03C5; 5332 target[limit++] = 0x0308; 5333 target[limit++] = 0x0301; 5334 break; 5335 5336 case 0x03C2: 5337 // For case ignore, numeric, and stored prefix string matching rules, 5338 // characters are case folded per B.2 of [RFC3454] : U+03C2 5339 target[limit++] = 0x03C3; 5340 break; 5341 5342 case 0x03D0: 5343 // For case ignore, numeric, and stored prefix string matching rules, 5344 // characters are case folded per B.2 of [RFC3454] : U+03D0 5345 target[limit++] = 0x03B2; 5346 break; 5347 5348 case 0x03D1: 5349 // For case ignore, numeric, and stored prefix string matching rules, 5350 // characters are case folded per B.2 of [RFC3454] : U+03D1 5351 target[limit++] = 0x03B8; 5352 break; 5353 5354 case 0x03D2: 5355 // For case ignore, numeric, and stored prefix string matching rules, 5356 // characters are case folded per B.2 of [RFC3454] : U+03D2 5357 target[limit++] = 0x03C5; 5358 break; 5359 5360 case 0x03D3: 5361 // For case ignore, numeric, and stored prefix string matching rules, 5362 // characters are case folded per B.2 of [RFC3454] : U+03D3 5363 target[limit++] = 0x03CD; 5364 break; 5365 5366 case 0x03D4: 5367 // For case ignore, numeric, and stored prefix string matching rules, 5368 // characters are case folded per B.2 of [RFC3454] : U+03D4 5369 target[limit++] = 0x03CB; 5370 break; 5371 5372 case 0x03D5: 5373 // For case ignore, numeric, and stored prefix string matching rules, 5374 // characters are case folded per B.2 of [RFC3454] : U+03D5 5375 target[limit++] = 0x03C6; 5376 break; 5377 5378 case 0x03D6: 5379 // For case ignore, numeric, and stored prefix string matching rules, 5380 // characters are case folded per B.2 of [RFC3454] : U+03D6 5381 target[limit++] = 0x03C0; 5382 break; 5383 5384 case 0x03D8: 5385 case 0x03DA: 5386 case 0x03DC: 5387 case 0x03DE: 5388 case 0x03E0: 5389 case 0x03E2: 5390 case 0x03E4: 5391 case 0x03E6: 5392 case 0x03E8: 5393 case 0x03EA: 5394 case 0x03EC: 5395 case 0x03EE: 5396 // For case ignore, numeric, and stored prefix string matching rules, 5397 // characters are case folded per B.2 of [RFC3454] : U+03D8-U+03EE 5398 target[limit++] = ( char ) ( c + 0x0001 ); 5399 break; 5400 5401 case 0x03F0: 5402 // For case ignore, numeric, and stored prefix string matching rules, 5403 // characters are case folded per B.2 of [RFC3454] : U+03F0 5404 target[limit++] = 0x03BA; 5405 break; 5406 5407 case 0x03F1: 5408 // For case ignore, numeric, and stored prefix string matching rules, 5409 // characters are case folded per B.2 of [RFC3454] : U+03F1 5410 target[limit++] = 0x03C1; 5411 break; 5412 5413 case 0x03F2: 5414 // For case ignore, numeric, and stored prefix string matching rules, 5415 // characters are case folded per B.2 of [RFC3454] : U+03F2 5416 target[limit++] = 0x03C3; 5417 break; 5418 5419 case 0x03F4: 5420 // For case ignore, numeric, and stored prefix string matching rules, 5421 // characters are case folded per B.2 of [RFC3454] : U+03F4 5422 target[limit++] = 0x03B8; 5423 break; 5424 5425 case 0x03F5: 5426 // For case ignore, numeric, and stored prefix string matching rules, 5427 // characters are case folded per B.2 of [RFC3454] : U+03F5 5428 target[limit++] = 0x03B5; 5429 break; 5430 5431 case 0x0400: 5432 case 0x0401: 5433 case 0x0402: 5434 case 0x0403: 5435 case 0x0404: 5436 case 0x0405: 5437 case 0x0406: 5438 case 0x0407: 5439 case 0x0408: 5440 case 0x0409: 5441 case 0x040A: 5442 case 0x040B: 5443 case 0x040C: 5444 case 0x040D: 5445 case 0x040E: 5446 case 0x040F: 5447 // For case ignore, numeric, and stored prefix string matching rules, 5448 // characters are case folded per B.2 of [RFC3454] : U+0400-U+040F 5449 target[limit++] = ( char ) ( c + 0x0050 ); 5450 break; 5451 5452 case 0x0410: 5453 case 0x0411: 5454 case 0x0412: 5455 case 0x0413: 5456 case 0x0414: 5457 case 0x0415: 5458 case 0x0416: 5459 case 0x0417: 5460 case 0x0418: 5461 case 0x0419: 5462 case 0x041A: 5463 case 0x041B: 5464 case 0x041C: 5465 case 0x041D: 5466 case 0x041E: 5467 case 0x041F: 5468 case 0x0420: 5469 case 0x0421: 5470 case 0x0422: 5471 case 0x0423: 5472 case 0x0424: 5473 case 0x0425: 5474 case 0x0426: 5475 case 0x0427: 5476 case 0x0428: 5477 case 0x0429: 5478 case 0x042A: 5479 case 0x042B: 5480 case 0x042C: 5481 case 0x042D: 5482 case 0x042E: 5483 case 0x042F: 5484 // For case ignore, numeric, and stored prefix string matching rules, 5485 // characters are case folded per B.2 of [RFC3454] : U+0410-U+042F 5486 target[limit++] = ( char ) ( c + 0x0020 ); 5487 break; 5488 5489 case 0x0460: 5490 case 0x0462: 5491 case 0x0464: 5492 case 0x0466: 5493 case 0x0468: 5494 case 0x046A: 5495 case 0x046C: 5496 case 0x046E: 5497 case 0x0470: 5498 case 0x0472: 5499 case 0x0474: 5500 case 0x0476: 5501 case 0x0478: 5502 case 0x047A: 5503 case 0x047C: 5504 case 0x047E: 5505 case 0x0480: 5506 case 0x048A: 5507 case 0x048C: 5508 case 0x048E: 5509 case 0x0490: 5510 case 0x0492: 5511 case 0x0494: 5512 case 0x0496: 5513 case 0x0498: 5514 case 0x049A: 5515 case 0x049C: 5516 case 0x049E: 5517 case 0x04A0: 5518 case 0x04A2: 5519 case 0x04A4: 5520 case 0x04A6: 5521 case 0x04A8: 5522 case 0x04AA: 5523 case 0x04AC: 5524 case 0x04AE: 5525 case 0x04B0: 5526 case 0x04B2: 5527 case 0x04B4: 5528 case 0x04B6: 5529 case 0x04B8: 5530 case 0x04BA: 5531 case 0x04BC: 5532 case 0x04BE: 5533 case 0x04C1: 5534 case 0x04C3: 5535 case 0x04C5: 5536 case 0x04C7: 5537 case 0x04C9: 5538 case 0x04CB: 5539 case 0x04CD: 5540 case 0x04D0: 5541 case 0x04D2: 5542 case 0x04D4: 5543 case 0x04D6: 5544 case 0x04D8: 5545 case 0x04DA: 5546 case 0x04DC: 5547 case 0x04DE: 5548 case 0x04E0: 5549 case 0x04E2: 5550 case 0x04E4: 5551 case 0x04E6: 5552 case 0x04E8: 5553 case 0x04EA: 5554 case 0x04EC: 5555 case 0x04EE: 5556 case 0x04F0: 5557 case 0x04F2: 5558 case 0x04F4: 5559 case 0x04F8: 5560 case 0x0500: 5561 case 0x0502: 5562 case 0x0504: 5563 case 0x0506: 5564 case 0x0508: 5565 case 0x050A: 5566 case 0x050C: 5567 case 0x050E: 5568 // For case ignore, numeric, and stored prefix string matching rules, 5569 // characters are case folded per B.2 of [RFC3454] : U+0460-U+050E 5570 target[limit++] = ( char ) ( c + 0x0001 ); 5571 break; 5572 5573 case 0x0531: 5574 case 0x0532: 5575 case 0x0533: 5576 case 0x0534: 5577 case 0x0535: 5578 case 0x0536: 5579 case 0x0537: 5580 case 0x0538: 5581 case 0x0539: 5582 case 0x053A: 5583 case 0x053B: 5584 case 0x053C: 5585 case 0x053D: 5586 case 0x053E: 5587 case 0x053F: 5588 case 0x0540: 5589 case 0x0541: 5590 case 0x0542: 5591 case 0x0543: 5592 case 0x0544: 5593 case 0x0545: 5594 case 0x0546: 5595 case 0x0547: 5596 case 0x0548: 5597 case 0x0549: 5598 case 0x054A: 5599 case 0x054B: 5600 case 0x054C: 5601 case 0x054D: 5602 case 0x054E: 5603 case 0x054F: 5604 case 0x0550: 5605 case 0x0551: 5606 case 0x0552: 5607 case 0x0553: 5608 case 0x0554: 5609 case 0x0555: 5610 case 0x0556: 5611 // For case ignore, numeric, and stored prefix string matching rules, 5612 // characters are case folded per B.2 of [RFC3454] : U+0531-U+0556 5613 target[limit++] = ( char ) ( c + 0x0030 ); 5614 break; 5615 5616 5617 case 0x0587: 5618 // For case ignore, numeric, and stored prefix string matching rules, 5619 // characters are case folded per B.2 of [RFC3454] : U+0587 5620 target[limit++] = 0x0565; 5621 target[limit++] = 0x0582; 5622 break; 5623 5624 case 0x06DD: 5625 case 0x070F: 5626 // All other control code (e.g., Cc) points or code points with a 5627 // control function (e.g., Cf) are mapped to nothing. The following is 5628 // a complete list of these code points: ... U+06DD-070F... 5629 break; 5630 5631 case 0x1680: 5632 // All other code points with Separator (space, line, or paragraph) property 5633 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 5634 // list of these code points: ...1680... 5635 target[limit++] = 0x0020; 5636 break; 5637 5638 case 0x1806: 5639 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 5640 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 5641 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 5642 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 5643 // mapped to nothing. 5644 break; 5645 5646 case 0x180B: 5647 case 0x180C: 5648 case 0x180D: 5649 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 5650 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 5651 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 5652 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 5653 // mapped to nothing. 5654 break; 5655 5656 case 0x180E: 5657 // All other control code (e.g., Cc) points or code points with a 5658 // control function (e.g., Cf) are mapped to nothing. The following is 5659 // a complete list of these code points: ... U+180E... 5660 break; 5661 5662 case 0x1E00: 5663 case 0x1E02: 5664 case 0x1E04: 5665 case 0x1E06: 5666 case 0x1E08: 5667 case 0x1E0A: 5668 case 0x1E0C: 5669 case 0x1E0E: 5670 case 0x1E10: 5671 case 0x1E12: 5672 case 0x1E14: 5673 case 0x1E16: 5674 case 0x1E18: 5675 case 0x1E1A: 5676 case 0x1E1C: 5677 case 0x1E1E: 5678 case 0x1E20: 5679 case 0x1E22: 5680 case 0x1E24: 5681 case 0x1E26: 5682 case 0x1E28: 5683 case 0x1E2A: 5684 case 0x1E2C: 5685 case 0x1E2E: 5686 case 0x1E30: 5687 case 0x1E32: 5688 case 0x1E34: 5689 case 0x1E36: 5690 case 0x1E38: 5691 case 0x1E3A: 5692 case 0x1E3C: 5693 case 0x1E3E: 5694 case 0x1E40: 5695 case 0x1E42: 5696 case 0x1E44: 5697 case 0x1E46: 5698 case 0x1E48: 5699 case 0x1E4A: 5700 case 0x1E4C: 5701 case 0x1E4E: 5702 case 0x1E50: 5703 case 0x1E52: 5704 case 0x1E54: 5705 case 0x1E56: 5706 case 0x1E58: 5707 case 0x1E5A: 5708 case 0x1E5C: 5709 case 0x1E5E: 5710 case 0x1E60: 5711 case 0x1E62: 5712 case 0x1E64: 5713 case 0x1E66: 5714 case 0x1E68: 5715 case 0x1E6A: 5716 case 0x1E6C: 5717 case 0x1E6E: 5718 case 0x1E70: 5719 case 0x1E72: 5720 case 0x1E74: 5721 case 0x1E76: 5722 case 0x1E78: 5723 case 0x1E7A: 5724 case 0x1E7C: 5725 case 0x1E7E: 5726 case 0x1E80: 5727 case 0x1E82: 5728 case 0x1E84: 5729 case 0x1E86: 5730 case 0x1E88: 5731 case 0x1E8A: 5732 case 0x1E8C: 5733 case 0x1E8E: 5734 case 0x1E90: 5735 case 0x1E92: 5736 case 0x1E94: 5737 // For case ignore, numeric, and stored prefix string matching rules, 5738 // characters are case folded per B.2 of [RFC3454] : U+1E00-U+1E94 5739 target[limit++] = ( char ) ( c + 0x0001 ); 5740 break; 5741 5742 case 0x1E96: 5743 // For case ignore, numeric, and stored prefix string matching rules, 5744 // characters are case folded per B.2 of [RFC3454] : U+1E96 5745 target[limit++] = 0x0068; 5746 target[limit++] = 0x0331; 5747 break; 5748 5749 case 0x1E97: 5750 // For case ignore, numeric, and stored prefix string matching rules, 5751 // characters are case folded per B.2 of [RFC3454] : U+1E97 5752 target[limit++] = 0x0074; 5753 target[limit++] = 0x0308; 5754 break; 5755 5756 case 0x1E98: 5757 // For case ignore, numeric, and stored prefix string matching rules, 5758 // characters are case folded per B.2 of [RFC3454] : U+1E98 5759 target[limit++] = 0x0077; 5760 target[limit++] = 0x030A; 5761 break; 5762 5763 case 0x1E99: 5764 // For case ignore, numeric, and stored prefix string matching rules, 5765 // characters are case folded per B.2 of [RFC3454] : U+1E99 5766 target[limit++] = 0x0079; 5767 target[limit++] = 0x030A; 5768 break; 5769 5770 case 0x1E9A: 5771 // For case ignore, numeric, and stored prefix string matching rules, 5772 // characters are case folded per B.2 of [RFC3454] : U+1E9A 5773 target[limit++] = 0x0061; 5774 target[limit++] = 0x02BE; 5775 break; 5776 5777 case 0x1E9B: 5778 // For case ignore, numeric, and stored prefix string matching rules, 5779 // characters are case folded per B.2 of [RFC3454] : U+1E9B 5780 target[limit++] = 0x1E61; 5781 break; 5782 5783 case 0x1EA0: 5784 case 0x1EA2: 5785 case 0x1EA4: 5786 case 0x1EA6: 5787 case 0x1EA8: 5788 case 0x1EAA: 5789 case 0x1EAC: 5790 case 0x1EAE: 5791 case 0x1EB0: 5792 case 0x1EB2: 5793 case 0x1EB4: 5794 case 0x1EB6: 5795 case 0x1EB8: 5796 case 0x1EBA: 5797 case 0x1EBC: 5798 case 0x1EBE: 5799 case 0x1EC0: 5800 case 0x1EC2: 5801 case 0x1EC4: 5802 case 0x1EC6: 5803 case 0x1EC8: 5804 case 0x1ECA: 5805 case 0x1ECC: 5806 case 0x1ECE: 5807 case 0x1ED0: 5808 case 0x1ED2: 5809 case 0x1ED4: 5810 case 0x1ED6: 5811 case 0x1ED8: 5812 case 0x1EDA: 5813 case 0x1EDC: 5814 case 0x1EDE: 5815 case 0x1EE0: 5816 case 0x1EE2: 5817 case 0x1EE4: 5818 case 0x1EE6: 5819 case 0x1EE8: 5820 case 0x1EEA: 5821 case 0x1EEC: 5822 case 0x1EEE: 5823 case 0x1EF0: 5824 case 0x1EF2: 5825 case 0x1EF4: 5826 case 0x1EF6: 5827 case 0x1EF8: 5828 // For case ignore, numeric, and stored prefix string matching rules, 5829 // characters are case folded per B.2 of [RFC3454] : U+1EA0-U+1EF8 5830 target[limit++] = ( char ) ( c + 0x0001 ); 5831 break; 5832 5833 case 0x1F08: 5834 case 0x1F09: 5835 case 0x1F0A: 5836 case 0x1F0B: 5837 case 0x1F0C: 5838 case 0x1F0D: 5839 case 0x1F0E: 5840 case 0x1F0F: 5841 case 0x1F18: 5842 case 0x1F19: 5843 case 0x1F1A: 5844 case 0x1F1B: 5845 case 0x1F1C: 5846 case 0x1F1D: 5847 case 0x1F28: 5848 case 0x1F29: 5849 case 0x1F2A: 5850 case 0x1F2B: 5851 case 0x1F2C: 5852 case 0x1F2D: 5853 case 0x1F2E: 5854 case 0x1F2F: 5855 case 0x1F38: 5856 case 0x1F39: 5857 case 0x1F3A: 5858 case 0x1F3B: 5859 case 0x1F3C: 5860 case 0x1F3D: 5861 case 0x1F3E: 5862 case 0x1F3F: 5863 case 0x1F48: 5864 case 0x1F49: 5865 case 0x1F4A: 5866 case 0x1F4B: 5867 case 0x1F4C: 5868 case 0x1F4D: 5869 // For case ignore, numeric, and stored prefix string matching rules, 5870 // characters are case folded per B.2 of [RFC3454] : U+1F08-U+1F4D 5871 target[limit++] = ( char ) ( c - 0x0008 ); 5872 break; 5873 5874 case 0x1F50: 5875 // For case ignore, numeric, and stored prefix string matching rules, 5876 // characters are case folded per B.2 of [RFC3454] : U+1F50 5877 target[limit++] = 0x03C5; 5878 target[limit++] = 0x0313; 5879 break; 5880 5881 case 0x1F52: 5882 // For case ignore, numeric, and stored prefix string matching rules, 5883 // characters are case folded per B.2 of [RFC3454] : U+1F52 5884 target[limit++] = 0x03C5; 5885 target[limit++] = 0x0313; 5886 target[limit++] = 0x0300; 5887 break; 5888 5889 case 0x1F54: 5890 // For case ignore, numeric, and stored prefix string matching rules, 5891 // characters are case folded per B.2 of [RFC3454] : U+1F54 5892 target[limit++] = 0x03C5; 5893 target[limit++] = 0x0313; 5894 target[limit++] = 0x0301; 5895 break; 5896 5897 case 0x1F56: 5898 // For case ignore, numeric, and stored prefix string matching rules, 5899 // characters are case folded per B.2 of [RFC3454] : U+1F56 5900 target[limit++] = 0x03C5; 5901 target[limit++] = 0x0313; 5902 target[limit++] = 0x0342; 5903 break; 5904 5905 case 0x1F59: 5906 case 0x1F5B: 5907 case 0x1F5D: 5908 case 0x1F5F: 5909 case 0x1F68: 5910 case 0x1F69: 5911 case 0x1F6A: 5912 case 0x1F6B: 5913 case 0x1F6C: 5914 case 0x1F6D: 5915 case 0x1F6E: 5916 case 0x1F6F: 5917 // For case ignore, numeric, and stored prefix string matching rules, 5918 // characters are case folded per B.2 of [RFC3454] : U+1F59-U+1F6F 5919 target[limit++] = ( char ) ( c - 0x0008 ); 5920 break; 5921 5922 case 0x1F80: 5923 case 0x1F81: 5924 case 0x1F82: 5925 case 0x1F83: 5926 case 0x1F84: 5927 case 0x1F85: 5928 case 0x1F86: 5929 case 0x1F87: 5930 // For case ignore, numeric, and stored prefix string matching rules, 5931 // characters are case folded per B.2 of [RFC3454] : U+1F80-U+1F87 5932 target[limit++] = ( char ) ( c - 0x0080 ); 5933 target[limit++] = 0x03B9; 5934 break; 5935 5936 case 0x1F88: 5937 case 0x1F89: 5938 case 0x1F8A: 5939 case 0x1F8B: 5940 case 0x1F8C: 5941 case 0x1F8D: 5942 case 0x1F8E: 5943 case 0x1F8F: 5944 // For case ignore, numeric, and stored prefix string matching rules, 5945 // characters are case folded per B.2 of [RFC3454] : U+1F88-U+1F8F 5946 target[limit++] = ( char ) ( c - 0x0088 ); 5947 target[limit++] = 0x03B9; 5948 break; 5949 5950 case 0x1F90: 5951 case 0x1F91: 5952 case 0x1F92: 5953 case 0x1F93: 5954 case 0x1F94: 5955 case 0x1F95: 5956 case 0x1F96: 5957 case 0x1F97: 5958 // For case ignore, numeric, and stored prefix string matching rules, 5959 // characters are case folded per B.2 of [RFC3454] : U+1F90-U+1F97 5960 target[limit++] = ( char ) ( c - 0x0070 ); 5961 target[limit++] = 0x03B9; 5962 break; 5963 5964 case 0x1F98: 5965 case 0x1F99: 5966 case 0x1F9A: 5967 case 0x1F9B: 5968 case 0x1F9C: 5969 case 0x1F9D: 5970 case 0x1F9E: 5971 case 0x1F9F: 5972 // For case ignore, numeric, and stored prefix string matching rules, 5973 // characters are case folded per B.2 of [RFC3454] : U+1F98-U+1F9F 5974 target[limit++] = ( char ) ( c - 0x0078 ); 5975 target[limit++] = 0x03B9; 5976 break; 5977 5978 case 0x1FA0: 5979 case 0x1FA1: 5980 case 0x1FA2: 5981 case 0x1FA3: 5982 case 0x1FA4: 5983 case 0x1FA5: 5984 case 0x1FA6: 5985 case 0x1FA7: 5986 // For case ignore, numeric, and stored prefix string matching rules, 5987 // characters are case folded per B.2 of [RFC3454] : U+1FA0-U+1FA7 5988 target[limit++] = ( char ) ( c - 0x0040 ); 5989 target[limit++] = 0x03B9; 5990 break; 5991 5992 case 0x1FA8: 5993 case 0x1FA9: 5994 case 0x1FAA: 5995 case 0x1FAB: 5996 case 0x1FAC: 5997 case 0x1FAD: 5998 case 0x1FAE: 5999 case 0x1FAF: 6000 // For case ignore, numeric, and stored prefix string matching rules, 6001 // characters are case folded per B.2 of [RFC3454] : U+1FA8-U+1FAF 6002 target[limit++] = ( char ) ( c - 0x0048 ); 6003 target[limit++] = 0x03B9; 6004 break; 6005 6006 case 0x1FB2: 6007 // For case ignore, numeric, and stored prefix string matching rules, 6008 // characters are case folded per B.2 of [RFC3454] : U+1FB2 6009 target[limit++] = 0x1F70; 6010 target[limit++] = 0x03B9; 6011 break; 6012 6013 case 0x1FB3: 6014 // For case ignore, numeric, and stored prefix string matching rules, 6015 // characters are case folded per B.2 of [RFC3454] : U+1FB3 6016 target[limit++] = 0x03B1; 6017 target[limit++] = 0x03B9; 6018 break; 6019 6020 case 0x1FB4: 6021 // For case ignore, numeric, and stored prefix string matching rules, 6022 // characters are case folded per B.2 of [RFC3454] : U+1FB4 6023 target[limit++] = 0x03AC; 6024 target[limit++] = 0x03B9; 6025 break; 6026 6027 case 0x1FB6: 6028 // For case ignore, numeric, and stored prefix string matching rules, 6029 // characters are case folded per B.2 of [RFC3454] : U+1FB6 6030 target[limit++] = 0x03B1; 6031 target[limit++] = 0x0342; 6032 break; 6033 6034 case 0x1FB7: 6035 // For case ignore, numeric, and stored prefix string matching rules, 6036 // characters are case folded per B.2 of [RFC3454] : U+1FB7 6037 target[limit++] = 0x03B1; 6038 target[limit++] = 0x0342; 6039 target[limit++] = 0x03B9; 6040 break; 6041 6042 case 0x1FB8: 6043 case 0x1FB9: 6044 // For case ignore, numeric, and stored prefix string matching rules, 6045 // characters are case folded per B.2 of [RFC3454] : U+1FB8,U+1FB9 6046 target[limit++] = ( char ) ( c - 0x0008 ); 6047 break; 6048 6049 case 0x1FBA: 6050 case 0x1FBB: 6051 // For case ignore, numeric, and stored prefix string matching rules, 6052 // characters are case folded per B.2 of [RFC3454] : U+1FBA,U+1FBB 6053 target[limit++] = ( char ) ( c - 0x004A ); 6054 target[limit++] = 0x1F70; 6055 break; 6056 6057 case 0x1FBC: 6058 // For case ignore, numeric, and stored prefix string matching rules, 6059 // characters are case folded per B.2 of [RFC3454] : U+1FBC 6060 target[limit++] = 0x03B1; 6061 target[limit++] = 0x03B9; 6062 break; 6063 6064 case 0x1FBE: 6065 // For case ignore, numeric, and stored prefix string matching rules, 6066 // characters are case folded per B.2 of [RFC3454] : U+1FBE 6067 target[limit++] = 0x03B9; 6068 break; 6069 6070 case 0x1FC2: 6071 // For case ignore, numeric, and stored prefix string matching rules, 6072 // characters are case folded per B.2 of [RFC3454] : U+1FC2 6073 target[limit++] = 0x1F74; 6074 target[limit++] = 0x03B9; 6075 break; 6076 6077 case 0x1FC3: 6078 // For case ignore, numeric, and stored prefix string matching rules, 6079 // characters are case folded per B.2 of [RFC3454] : U+1FC3 6080 target[limit++] = 0x03B7; 6081 target[limit++] = 0x03B9; 6082 break; 6083 6084 case 0x1FC4: 6085 // For case ignore, numeric, and stored prefix string matching rules, 6086 // characters are case folded per B.2 of [RFC3454] : U+1FC4 6087 target[limit++] = 0x03AE; 6088 target[limit++] = 0x03B9; 6089 break; 6090 6091 case 0x1FC6: 6092 // For case ignore, numeric, and stored prefix string matching rules, 6093 // characters are case folded per B.2 of [RFC3454] : U+1FC6 6094 target[limit++] = 0x03B7; 6095 target[limit++] = 0x0342; 6096 break; 6097 6098 case 0x1FC7: 6099 // For case ignore, numeric, and stored prefix string matching rules, 6100 // characters are case folded per B.2 of [RFC3454] : U+1FC7 6101 target[limit++] = 0x03B7; 6102 target[limit++] = 0x0342; 6103 target[limit++] = 0x03B9; 6104 break; 6105 6106 case 0x1FC8: 6107 case 0x1FC9: 6108 case 0x1FCA: 6109 case 0x1FCB: 6110 // For case ignore, numeric, and stored prefix string matching rules, 6111 // characters are case folded per B.2 of [RFC3454] : U+1FC8-U+01FCB 6112 target[limit++] = ( char ) ( c - 0x0056 ); 6113 target[limit++] = 0x1F72; 6114 break; 6115 6116 case 0x1FCC: 6117 // For case ignore, numeric, and stored prefix string matching rules, 6118 // characters are case folded per B.2 of [RFC3454] : U+1FCC 6119 target[limit++] = 0x03B7; 6120 target[limit++] = 0x03B9; 6121 break; 6122 6123 case 0x1FD2: 6124 // For case ignore, numeric, and stored prefix string matching rules, 6125 // characters are case folded per B.2 of [RFC3454] : U+1FD2 6126 target[limit++] = 0x03B9; 6127 target[limit++] = 0x0308; 6128 target[limit++] = 0x0300; 6129 break; 6130 6131 case 0x1FD3: 6132 // For case ignore, numeric, and stored prefix string matching rules, 6133 // characters are case folded per B.2 of [RFC3454] : U+1FD3 6134 target[limit++] = 0x03B9; 6135 target[limit++] = 0x0308; 6136 target[limit++] = 0x0301; 6137 break; 6138 6139 case 0x1FD6: 6140 // For case ignore, numeric, and stored prefix string matching rules, 6141 // characters are case folded per B.2 of [RFC3454] : U+1FD6 6142 target[limit++] = 0x03B9; 6143 target[limit++] = 0x0342; 6144 break; 6145 6146 case 0x1FD7: 6147 // For case ignore, numeric, and stored prefix string matching rules, 6148 // characters are case folded per B.2 of [RFC3454] : U+1FD7 6149 target[limit++] = 0x03B9; 6150 target[limit++] = 0x0308; 6151 target[limit++] = 0x0342; 6152 break; 6153 6154 case 0x1FD8: 6155 case 0x1FD9: 6156 // For case ignore, numeric, and stored prefix string matching rules, 6157 // characters are case folded per B.2 of [RFC3454] : U+1FD8-U+01FD9 6158 target[limit++] = ( char ) ( c - 0x0008 ); 6159 break; 6160 6161 case 0x1FDA: 6162 case 0x1FDB: 6163 // For case ignore, numeric, and stored prefix string matching rules, 6164 // characters are case folded per B.2 of [RFC3454] : U+1FD8-U+01FD9 6165 target[limit++] = ( char ) ( c - 0x0064 ); 6166 break; 6167 6168 case 0x1FE2: 6169 // For case ignore, numeric, and stored prefix string matching rules, 6170 // characters are case folded per B.2 of [RFC3454] : U+1FE2 6171 target[limit++] = 0x03C5; 6172 target[limit++] = 0x0308; 6173 target[limit++] = 0x0300; 6174 break; 6175 6176 case 0x1FE3: 6177 // For case ignore, numeric, and stored prefix string matching rules, 6178 // characters are case folded per B.2 of [RFC3454] : U+1FE3 6179 target[limit++] = 0x03C5; 6180 target[limit++] = 0x0308; 6181 target[limit++] = 0x0301; 6182 break; 6183 6184 case 0x1FE4: 6185 // For case ignore, numeric, and stored prefix string matching rules, 6186 // characters are case folded per B.2 of [RFC3454] : U+1FE4 6187 target[limit++] = 0x03C1; 6188 target[limit++] = 0x0313; 6189 break; 6190 6191 case 0x1FE6: 6192 // For case ignore, numeric, and stored prefix string matching rules, 6193 // characters are case folded per B.2 of [RFC3454] : U+1FE6 6194 target[limit++] = 0x03C5; 6195 target[limit++] = 0x0342; 6196 break; 6197 6198 case 0x1FE7: 6199 // For case ignore, numeric, and stored prefix string matching rules, 6200 // characters are case folded per B.2 of [RFC3454] : U+1FE7 6201 target[limit++] = 0x03C5; 6202 target[limit++] = 0x0308; 6203 target[limit++] = 0x0342; 6204 break; 6205 6206 case 0x1FE8: 6207 case 0x1FE9: 6208 // For case ignore, numeric, and stored prefix string matching rules, 6209 // characters are case folded per B.2 of [RFC3454] : U+1FE8-U+01FE9 6210 target[limit++] = ( char ) ( c - 0x0008 ); 6211 break; 6212 6213 case 0x1FEA: 6214 case 0x1FEB: 6215 // For case ignore, numeric, and stored prefix string matching rules, 6216 // characters are case folded per B.2 of [RFC3454] : U+1FEA-U+01FEB 6217 target[limit++] = ( char ) ( c - 0x0070 ); 6218 break; 6219 6220 case 0x1FEC: 6221 // For case ignore, numeric, and stored prefix string matching rules, 6222 // characters are case folded per B.2 of [RFC3454] : U+1FEC 6223 target[limit++] = 0x1FE5; 6224 break; 6225 6226 case 0x1FF2: 6227 // For case ignore, numeric, and stored prefix string matching rules, 6228 // characters are case folded per B.2 of [RFC3454] : U+1FF2 6229 target[limit++] = 0x1F7C; 6230 target[limit++] = 0x03B9; 6231 break; 6232 6233 case 0x1FF3: 6234 // For case ignore, numeric, and stored prefix string matching rules, 6235 // characters are case folded per B.2 of [RFC3454] : U+1FF3 6236 target[limit++] = 0x03C9; 6237 target[limit++] = 0x03B9; 6238 break; 6239 6240 case 0x1FF4: 6241 // For case ignore, numeric, and stored prefix string matching rules, 6242 // characters are case folded per B.2 of [RFC3454] : U+1FF4 6243 target[limit++] = 0x03CE; 6244 target[limit++] = 0x03B9; 6245 break; 6246 6247 case 0x1FF6: 6248 // For case ignore, numeric, and stored prefix string matching rules, 6249 // characters are case folded per B.2 of [RFC3454] : U+1FF6 6250 target[limit++] = 0x03C9; 6251 target[limit++] = 0x0342; 6252 break; 6253 6254 case 0x1FF7: 6255 // For case ignore, numeric, and stored prefix string matching rules, 6256 // characters are case folded per B.2 of [RFC3454] : U+1FF7 6257 target[limit++] = 0x03C9; 6258 target[limit++] = 0x0342; 6259 target[limit++] = 0x03B9; 6260 break; 6261 6262 case 0x1FF8: 6263 case 0x1FF9: 6264 // For case ignore, numeric, and stored prefix string matching rules, 6265 // characters are case folded per B.2 of [RFC3454] : U+1FF8-U+01FF9 6266 target[limit++] = ( char ) ( c - 0x0080 ); 6267 break; 6268 6269 case 0x1FFA: 6270 case 0x1FFB: 6271 // For case ignore, numeric, and stored prefix string matching rules, 6272 // characters are case folded per B.2 of [RFC3454] : U+1FFA-U+01FFB 6273 target[limit++] = ( char ) ( c - 0x007E ); 6274 target[limit++] = 0x1F7C; 6275 break; 6276 6277 case 0x1FFC: 6278 // For case ignore, numeric, and stored prefix string matching rules, 6279 // characters are case folded per B.2 of [RFC3454] : U+1FFC 6280 target[limit++] = 0x03C9; 6281 target[limit++] = 0x03B9; 6282 break; 6283 6284 case 0x2000: 6285 case 0x2001: 6286 case 0x2002: 6287 case 0x2003: 6288 case 0x2004: 6289 case 0x2005: 6290 case 0x2006: 6291 case 0x2007: 6292 case 0x2008: 6293 case 0x2009: 6294 case 0x200A: 6295 // All other code points with Separator (space, line, or paragraph) property 6296 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 6297 // list of these code points: ...2000-200A... 6298 target[limit++] = 0x0020; 6299 break; 6300 6301 case 0x200B: 6302 // ZERO WIDTH SPACE (U+200B) is mapped to nothing. 6303 break; 6304 6305 case 0x200C: 6306 case 0x200D: 6307 case 0x200E: 6308 case 0x200F: 6309 // All other control code (e.g., Cc) points or code points with a 6310 // control function (e.g., Cf) are mapped to nothing. The following is 6311 // a complete list of these code points: ... U+200C-200FF... 6312 break; 6313 6314 case 0x2028: 6315 case 0x2029: 6316 // All other code points with Separator (space, line, or paragraph) property 6317 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 6318 // list of these code points: ... 2028-2029... 6319 target[limit++] = 0x0020; 6320 break; 6321 6322 case 0x202A: 6323 case 0x202B: 6324 case 0x202C: 6325 case 0x202D: 6326 case 0x202E: 6327 // All other control code (e.g., Cc) points or code points with a 6328 // control function (e.g., Cf) are mapped to nothing. The following is 6329 // a complete list of these code points: ... U+202A-202E... 6330 break; 6331 6332 case 0x202F: 6333 // All other code points with Separator (space, line, or paragraph) property 6334 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 6335 // list of these code points: ... 202F ... 6336 target[limit++] = 0x0020; 6337 break; 6338 6339 case 0x205F: 6340 // All other code points with Separator (space, line, or paragraph) property 6341 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 6342 // list of these code points:...205F... 6343 target[limit++] = 0x0020; 6344 break; 6345 6346 case 0x2060: 6347 case 0x2061: 6348 case 0x2062: 6349 case 0x2063: 6350 // All other control code (e.g., Cc) points or code points with a 6351 // control function (e.g., Cf) are mapped to nothing. The following is 6352 // a complete list of these code points: ... U+2060-2063... 6353 break; 6354 6355 case 0x206A: 6356 case 0x206B: 6357 case 0x206C: 6358 case 0x206D: 6359 case 0x206E: 6360 case 0x206F: 6361 // All other control code (e.g., Cc) points or code points with a 6362 // control function (e.g., Cf) are mapped to nothing. The following is 6363 // a complete list of these code points: ... U+20GA-20GFF... 6364 break; 6365 6366 case 0x20A8: 6367 // For case ignore, numeric, and stored prefix string matching rules, 6368 // characters are case folded per B.2 of [RFC3454] : U+20A8 6369 target[limit++] = 0x0072; 6370 target[limit++] = 0x0073; 6371 break; 6372 6373 case 0x2102: 6374 // For case ignore, numeric, and stored prefix string matching rules, 6375 // characters are case folded per B.2 of [RFC3454] : U+2102 6376 target[limit++] = 0x0063; 6377 break; 6378 6379 case 0x2103: 6380 // For case ignore, numeric, and stored prefix string matching rules, 6381 // characters are case folded per B.2 of [RFC3454] : U+2103 6382 target[limit++] = 0x00B0; 6383 target[limit++] = 0x0063; 6384 break; 6385 6386 case 0x2107: 6387 // For case ignore, numeric, and stored prefix string matching rules, 6388 // characters are case folded per B.2 of [RFC3454] : U+2107 6389 target[limit++] = 0x025B; 6390 break; 6391 6392 case 0x2109: 6393 // For case ignore, numeric, and stored prefix string matching rules, 6394 // characters are case folded per B.2 of [RFC3454] : U+2109 6395 target[limit++] = 0x00B0; 6396 target[limit++] = 0x0066; 6397 break; 6398 6399 case 0x210B: 6400 // For case ignore, numeric, and stored prefix string matching rules, 6401 // characters are case folded per B.2 of [RFC3454] : U+210B 6402 target[limit++] = 0x0068; 6403 break; 6404 6405 case 0x210C: 6406 // For case ignore, numeric, and stored prefix string matching rules, 6407 // characters are case folded per B.2 of [RFC3454] : U+210C 6408 target[limit++] = 0x0068; 6409 break; 6410 6411 case 0x210D: 6412 // For case ignore, numeric, and stored prefix string matching rules, 6413 // characters are case folded per B.2 of [RFC3454] : U+210D 6414 target[limit++] = 0x0068; 6415 break; 6416 6417 case 0x2110: 6418 // For case ignore, numeric, and stored prefix string matching rules, 6419 // characters are case folded per B.2 of [RFC3454] : U+2110 6420 target[limit++] = 0x0069; 6421 break; 6422 6423 case 0x2111: 6424 // For case ignore, numeric, and stored prefix string matching rules, 6425 // characters are case folded per B.2 of [RFC3454] : U+2111 6426 target[limit++] = 0x0069; 6427 break; 6428 6429 case 0x2112: 6430 // For case ignore, numeric, and stored prefix string matching rules, 6431 // characters are case folded per B.2 of [RFC3454] : U+2112 6432 target[limit++] = 0x006C; 6433 break; 6434 6435 case 0x2115: 6436 // For case ignore, numeric, and stored prefix string matching rules, 6437 // characters are case folded per B.2 of [RFC3454] : U+2115 6438 target[limit++] = 0x006E; 6439 break; 6440 6441 case 0x2116: 6442 // For case ignore, numeric, and stored prefix string matching rules, 6443 // characters are case folded per B.2 of [RFC3454] : U+2116 6444 target[limit++] = 0x006E; 6445 target[limit++] = 0x006F; 6446 break; 6447 6448 case 0x2119: 6449 case 0x211A: 6450 case 0x211B: 6451 // For case ignore, numeric, and stored prefix string matching rules, 6452 // characters are case folded per B.2 of [RFC3454] : U+2119-U+211B 6453 target[limit++] = ( char ) ( c - 0x2A09 ); 6454 break; 6455 6456 case 0x211C: 6457 // For case ignore, numeric, and stored prefix string matching rules, 6458 // characters are case folded per B.2 of [RFC3454] : U+211C 6459 target[limit++] = 0x0072; 6460 break; 6461 6462 case 0x211D: 6463 // For case ignore, numeric, and stored prefix string matching rules, 6464 // characters are case folded per B.2 of [RFC3454] : U+211D 6465 target[limit++] = 0x0072; 6466 break; 6467 6468 case 0x2120: 6469 // For case ignore, numeric, and stored prefix string matching rules, 6470 // characters are case folded per B.2 of [RFC3454] : U+2120 6471 target[limit++] = 0x0073; 6472 target[limit++] = 0x006D; 6473 break; 6474 6475 case 0x2121: 6476 // For case ignore, numeric, and stored prefix string matching rules, 6477 // characters are case folded per B.2 of [RFC3454] : U+2121 6478 target[limit++] = 0x0074; 6479 target[limit++] = 0x0065; 6480 target[limit++] = 0x006C; 6481 break; 6482 6483 case 0x2122: 6484 // For case ignore, numeric, and stored prefix string matching rules, 6485 // characters are case folded per B.2 of [RFC3454] : U+2122 6486 target[limit++] = 0x0074; 6487 target[limit++] = 0x006D; 6488 break; 6489 6490 case 0x2124: 6491 // For case ignore, numeric, and stored prefix string matching rules, 6492 // characters are case folded per B.2 of [RFC3454] : U+2122 6493 target[limit++] = 0x007A; 6494 break; 6495 6496 case 0x2126: 6497 // For case ignore, numeric, and stored prefix string matching rules, 6498 // characters are case folded per B.2 of [RFC3454] : U+2122 6499 target[limit++] = 0x03C9; 6500 break; 6501 6502 case 0x2128: 6503 // For case ignore, numeric, and stored prefix string matching rules, 6504 // characters are case folded per B.2 of [RFC3454] : U+2122 6505 target[limit++] = 0x007A; 6506 break; 6507 6508 case 0x212A: 6509 // For case ignore, numeric, and stored prefix string matching rules, 6510 // characters are case folded per B.2 of [RFC3454] : U+2122 6511 target[limit++] = 0x006B; 6512 break; 6513 6514 case 0x212B: 6515 // For case ignore, numeric, and stored prefix string matching rules, 6516 // characters are case folded per B.2 of [RFC3454] : U+2122 6517 target[limit++] = 0x00E5; 6518 break; 6519 6520 case 0x212C: 6521 case 0x212D: 6522 // For case ignore, numeric, and stored prefix string matching rules, 6523 // characters are case folded per B.2 of [RFC3454] : U+212C-U+212D 6524 target[limit++] = ( char ) ( c - 0x20CA ); 6525 break; 6526 6527 case 0x2130: 6528 case 0x2131: 6529 // For case ignore, numeric, and stored prefix string matching rules, 6530 // characters are case folded per B.2 of [RFC3454] : U+2130-U+2131 6531 target[limit++] = ( char ) ( c - 0x20CB ); 6532 break; 6533 6534 case 0x2133: 6535 // For case ignore, numeric, and stored prefix string matching rules, 6536 // characters are case folded per B.2 of [RFC3454] : U+2133 6537 target[limit++] = 0x006D; 6538 break; 6539 6540 case 0x213E: 6541 // For case ignore, numeric, and stored prefix string matching rules, 6542 // characters are case folded per B.2 of [RFC3454] : U+213E 6543 target[limit++] = 0x03B3; 6544 break; 6545 6546 case 0x213F: 6547 // For case ignore, numeric, and stored prefix string matching rules, 6548 // characters are case folded per B.2 of [RFC3454] : U+213F 6549 target[limit++] = 0x03C0; 6550 break; 6551 6552 case 0x2145: 6553 // For case ignore, numeric, and stored prefix string matching rules, 6554 // characters are case folded per B.2 of [RFC3454] : U+2145 6555 target[limit++] = 0x0064; 6556 break; 6557 6558 case 0x2160: 6559 case 0x2161: 6560 case 0x2162: 6561 case 0x2163: 6562 case 0x2164: 6563 case 0x2165: 6564 case 0x2166: 6565 case 0x2167: 6566 case 0x2168: 6567 case 0x2169: 6568 case 0x216A: 6569 case 0x216B: 6570 case 0x216C: 6571 case 0x216D: 6572 case 0x216E: 6573 case 0x216F: 6574 // For case ignore, numeric, and stored prefix string matching rules, 6575 // characters are case folded per B.2 of [RFC3454] : U+2160-U+216F 6576 target[limit++] = ( char ) ( c + 0x0010 ); 6577 break; 6578 6579 case 0x24B6: 6580 case 0x24B7: 6581 case 0x24B8: 6582 case 0x24B9: 6583 case 0x24BA: 6584 case 0x24BB: 6585 case 0x24BC: 6586 case 0x24BD: 6587 case 0x24BE: 6588 case 0x24BF: 6589 case 0x24C0: 6590 case 0x24C1: 6591 case 0x24C2: 6592 case 0x24C3: 6593 case 0x24C4: 6594 case 0x24C5: 6595 case 0x24C6: 6596 case 0x24C7: 6597 case 0x24C8: 6598 case 0x24C9: 6599 case 0x24CA: 6600 case 0x24CB: 6601 case 0x24CC: 6602 case 0x24CD: 6603 case 0x24CE: 6604 case 0x24CF: 6605 // For case ignore, numeric, and stored prefix string matching rules, 6606 // characters are case folded per B.2 of [RFC3454] : U+24B6-U+24CF 6607 target[limit++] = ( char ) ( c + 0x001A ); 6608 break; 6609 6610 case 0x3000: 6611 // All other code points with Separator (space, line, or paragraph) property 6612 // (e.g., Zs, Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 6613 // list of these code points: ...3000. 6614 target[limit++] = 0x0020; 6615 break; 6616 6617 case 0x3371: 6618 // For case ignore, numeric, and stored prefix string matching rules, 6619 // characters are case folded per B.2 of [RFC3454] : U+3371 6620 target[limit++] = 0x0068; 6621 target[limit++] = 0x0070; 6622 target[limit++] = 0x0061; 6623 break; 6624 6625 case 0x3373: 6626 // For case ignore, numeric, and stored prefix string matching rules, 6627 // characters are case folded per B.2 of [RFC3454] : U+3373 6628 target[limit++] = 0x0061; 6629 target[limit++] = 0x0075; 6630 break; 6631 6632 case 0x3375: 6633 // For case ignore, numeric, and stored prefix string matching rules, 6634 // characters are case folded per B.2 of [RFC3454] : U+3375 6635 target[limit++] = 0x006F; 6636 target[limit++] = 0x0076; 6637 break; 6638 6639 case 0x3380: 6640 // For case ignore, numeric, and stored prefix string matching rules, 6641 // characters are case folded per B.2 of [RFC3454] : U+3380 6642 target[limit++] = 0x0070; 6643 target[limit++] = 0x0061; 6644 break; 6645 6646 case 0x3381: 6647 // For case ignore, numeric, and stored prefix string matching rules, 6648 // characters are case folded per B.2 of [RFC3454] : U+3381 6649 target[limit++] = 0x006E; 6650 target[limit++] = 0x0061; 6651 break; 6652 6653 case 0x3382: 6654 // For case ignore, numeric, and stored prefix string matching rules, 6655 // characters are case folded per B.2 of [RFC3454] : U+3382 6656 target[limit++] = 0x03BC; 6657 target[limit++] = 0x0061; 6658 break; 6659 6660 case 0x3383: 6661 // For case ignore, numeric, and stored prefix string matching rules, 6662 // characters are case folded per B.2 of [RFC3454] : U+3383 6663 target[limit++] = 0x006D; 6664 target[limit++] = 0x0061; 6665 break; 6666 6667 case 0x3384: 6668 // For case ignore, numeric, and stored prefix string matching rules, 6669 // characters are case folded per B.2 of [RFC3454] : U+3384 6670 target[limit++] = 0x006B; 6671 target[limit++] = 0x0061; 6672 break; 6673 6674 case 0x3385: 6675 // For case ignore, numeric, and stored prefix string matching rules, 6676 // characters are case folded per B.2 of [RFC3454] : U+3385 6677 target[limit++] = 0x006B; 6678 target[limit++] = 0x0062; 6679 break; 6680 6681 case 0x3386: 6682 // For case ignore, numeric, and stored prefix string matching rules, 6683 // characters are case folded per B.2 of [RFC3454] : U+3386 6684 target[limit++] = 0x006D; 6685 target[limit++] = 0x0062; 6686 break; 6687 6688 case 0x3387: 6689 // For case ignore, numeric, and stored prefix string matching rules, 6690 // characters are case folded per B.2 of [RFC3454] : U+3387 6691 target[limit++] = 0x0067; 6692 target[limit++] = 0x0062; 6693 break; 6694 6695 case 0x338A: 6696 // For case ignore, numeric, and stored prefix string matching rules, 6697 // characters are case folded per B.2 of [RFC3454] : U+338A 6698 target[limit++] = 0x0070; 6699 target[limit++] = 0x0066; 6700 break; 6701 6702 case 0x338B: 6703 // For case ignore, numeric, and stored prefix string matching rules, 6704 // characters are case folded per B.2 of [RFC3454] : U+338B 6705 target[limit++] = 0x006E; 6706 target[limit++] = 0x0066; 6707 break; 6708 6709 case 0x338C: 6710 // For case ignore, numeric, and stored prefix string matching rules, 6711 // characters are case folded per B.2 of [RFC3454] : U+338C 6712 target[limit++] = 0x03BC; 6713 target[limit++] = 0x0066; 6714 break; 6715 6716 case 0x3390: 6717 // For case ignore, numeric, and stored prefix string matching rules, 6718 // characters are case folded per B.2 of [RFC3454] : U+3390 6719 target[limit++] = 0x0068; 6720 target[limit++] = 0x007A; 6721 break; 6722 6723 case 0x3391: 6724 // For case ignore, numeric, and stored prefix string matching rules, 6725 // characters are case folded per B.2 of [RFC3454] : U+3391 6726 target[limit++] = 0x006B; 6727 target[limit++] = 0x0068; 6728 target[limit++] = 0x007A; 6729 break; 6730 6731 case 0x3392: 6732 // For case ignore, numeric, and stored prefix string matching rules, 6733 // characters are case folded per B.2 of [RFC3454] : U+3392 6734 target[limit++] = 0x006D; 6735 target[limit++] = 0x0068; 6736 target[limit++] = 0x007A; 6737 break; 6738 6739 case 0x3393: 6740 // For case ignore, numeric, and stored prefix string matching rules, 6741 // characters are case folded per B.2 of [RFC3454] : U+3393 6742 target[limit++] = 0x0067; 6743 target[limit++] = 0x0068; 6744 target[limit++] = 0x007A; 6745 break; 6746 6747 case 0x3394: 6748 // For case ignore, numeric, and stored prefix string matching rules, 6749 // characters are case folded per B.2 of [RFC3454] : U+3394 6750 target[limit++] = 0x0074; 6751 target[limit++] = 0x0068; 6752 target[limit++] = 0x007A; 6753 break; 6754 6755 case 0x33A9: 6756 // For case ignore, numeric, and stored prefix string matching rules, 6757 // characters are case folded per B.2 of [RFC3454] : U+33A9 6758 target[limit++] = 0x0070; 6759 target[limit++] = 0x0061; 6760 break; 6761 6762 case 0x33AA: 6763 // For case ignore, numeric, and stored prefix string matching rules, 6764 // characters are case folded per B.2 of [RFC3454] : U+33AA 6765 target[limit++] = 0x006B; 6766 target[limit++] = 0x0070; 6767 target[limit++] = 0x0061; 6768 break; 6769 6770 case 0x33AB: 6771 // For case ignore, numeric, and stored prefix string matching rules, 6772 // characters are case folded per B.2 of [RFC3454] : U+33AB 6773 target[limit++] = 0x006D; 6774 target[limit++] = 0x0070; 6775 target[limit++] = 0x0061; 6776 break; 6777 6778 case 0x33AC: 6779 // For case ignore, numeric, and stored prefix string matching rules, 6780 // characters are case folded per B.2 of [RFC3454] : U+33AC 6781 target[limit++] = 0x0067; 6782 target[limit++] = 0x0070; 6783 target[limit++] = 0x0061; 6784 break; 6785 6786 case 0x33B4: 6787 // For case ignore, numeric, and stored prefix string matching rules, 6788 // characters are case folded per B.2 of [RFC3454] : U+33B4 6789 target[limit++] = 0x0070; 6790 target[limit++] = 0x0076; 6791 break; 6792 6793 case 0x33B5: 6794 // For case ignore, numeric, and stored prefix string matching rules, 6795 // characters are case folded per B.2 of [RFC3454] : U+33B5 6796 target[limit++] = 0x006E; 6797 target[limit++] = 0x0076; 6798 break; 6799 6800 case 0x33B6: 6801 // For case ignore, numeric, and stored prefix string matching rules, 6802 // characters are case folded per B.2 of [RFC3454] : U+33B6 6803 target[limit++] = 0x03BC; 6804 target[limit++] = 0x0076; 6805 break; 6806 6807 case 0x33B7: 6808 // For case ignore, numeric, and stored prefix string matching rules, 6809 // characters are case folded per B.2 of [RFC3454] : U+33B7 6810 target[limit++] = 0x006D; 6811 target[limit++] = 0x0076; 6812 break; 6813 6814 case 0x33B8: 6815 // For case ignore, numeric, and stored prefix string matching rules, 6816 // characters are case folded per B.2 of [RFC3454] : U+33B8 6817 target[limit++] = 0x006B; 6818 target[limit++] = 0x0076; 6819 break; 6820 6821 case 0x33B9: 6822 // For case ignore, numeric, and stored prefix string matching rules, 6823 // characters are case folded per B.2 of [RFC3454] : U+33B9 6824 target[limit++] = 0x006D; 6825 target[limit++] = 0x0076; 6826 break; 6827 6828 case 0x33BA: 6829 // For case ignore, numeric, and stored prefix string matching rules, 6830 // characters are case folded per B.2 of [RFC3454] : U+33BA 6831 target[limit++] = 0x0070; 6832 target[limit++] = 0x0077; 6833 break; 6834 6835 case 0x33BB: 6836 // For case ignore, numeric, and stored prefix string matching rules, 6837 // characters are case folded per B.2 of [RFC3454] : U+33BB 6838 target[limit++] = 0x006E; 6839 target[limit++] = 0x0077; 6840 break; 6841 6842 case 0x33BC: 6843 // For case ignore, numeric, and stored prefix string matching rules, 6844 // characters are case folded per B.2 of [RFC3454] : U+33BC 6845 target[limit++] = 0x03BC; 6846 target[limit++] = 0x0077; 6847 break; 6848 6849 case 0x33BD: 6850 // For case ignore, numeric, and stored prefix string matching rules, 6851 // characters are case folded per B.2 of [RFC3454] : U+33BD 6852 target[limit++] = 0x006D; 6853 target[limit++] = 0x0077; 6854 break; 6855 6856 case 0x33BE: 6857 // For case ignore, numeric, and stored prefix string matching rules, 6858 // characters are case folded per B.2 of [RFC3454] : U+33BE 6859 target[limit++] = 0x006B; 6860 target[limit++] = 0x0077; 6861 break; 6862 6863 case 0x33BF: 6864 // For case ignore, numeric, and stored prefix string matching rules, 6865 // characters are case folded per B.2 of [RFC3454] : U+33BF 6866 target[limit++] = 0x006D; 6867 target[limit++] = 0x0077; 6868 break; 6869 6870 case 0x33C0: 6871 // For case ignore, numeric, and stored prefix string matching rules, 6872 // characters are case folded per B.2 of [RFC3454] : U+33C0 6873 target[limit++] = 0x006B; 6874 target[limit++] = 0x03C9; 6875 break; 6876 6877 case 0x33C1: 6878 // For case ignore, numeric, and stored prefix string matching rules, 6879 // characters are case folded per B.2 of [RFC3454] : U+33C1 6880 target[limit++] = 0x006D; 6881 target[limit++] = 0x03C9; 6882 break; 6883 6884 case 0x33C3: 6885 // For case ignore, numeric, and stored prefix string matching rules, 6886 // characters are case folded per B.2 of [RFC3454] : U+33C3 6887 target[limit++] = 0x0062; 6888 target[limit++] = 0x0071; 6889 break; 6890 6891 case 0x33C6: 6892 // For case ignore, numeric, and stored prefix string matching rules, 6893 // characters are case folded per B.2 of [RFC3454] : U+33C6 6894 target[limit++] = 0x0063; 6895 target[limit++] = 0x2215; 6896 target[limit++] = 0x006B; 6897 target[limit++] = 0x0067; 6898 break; 6899 6900 case 0x33C7: 6901 // For case ignore, numeric, and stored prefix string matching rules, 6902 // characters are case folded per B.2 of [RFC3454] : U+33C7 6903 target[limit++] = 0x0063; 6904 target[limit++] = 0x006F; 6905 target[limit++] = 0x002E; 6906 break; 6907 6908 case 0x33C8: 6909 // For case ignore, numeric, and stored prefix string matching rules, 6910 // characters are case folded per B.2 of [RFC3454] : U+33C8 6911 target[limit++] = 0x0064; 6912 target[limit++] = 0x0062; 6913 break; 6914 6915 case 0x33C9: 6916 // For case ignore, numeric, and stored prefix string matching rules, 6917 // characters are case folded per B.2 of [RFC3454] : U+33C9 6918 target[limit++] = 0x0067; 6919 target[limit++] = 0x0079; 6920 break; 6921 6922 case 0x33CB: 6923 // For case ignore, numeric, and stored prefix string matching rules, 6924 // characters are case folded per B.2 of [RFC3454] : U+33CB 6925 target[limit++] = 0x0068; 6926 target[limit++] = 0x0070; 6927 break; 6928 6929 case 0x33CD: 6930 // For case ignore, numeric, and stored prefix string matching rules, 6931 // characters are case folded per B.2 of [RFC3454] : U+33CD 6932 target[limit++] = 0x006B; 6933 target[limit++] = 0x006B; 6934 break; 6935 6936 case 0x33CE: 6937 // For case ignore, numeric, and stored prefix string matching rules, 6938 // characters are case folded per B.2 of [RFC3454] : U+33CE 6939 target[limit++] = 0x006B; 6940 target[limit++] = 0x006D; 6941 break; 6942 6943 case 0x33D7: 6944 // For case ignore, numeric, and stored prefix string matching rules, 6945 // characters are case folded per B.2 of [RFC3454] : U+33D7 6946 target[limit++] = 0x0070; 6947 target[limit++] = 0x0068; 6948 break; 6949 6950 case 0x33D9: 6951 // For case ignore, numeric, and stored prefix string matching rules, 6952 // characters are case folded per B.2 of [RFC3454] : U+33D9 6953 target[limit++] = 0x0070; 6954 target[limit++] = 0x0070; 6955 target[limit++] = 0x006D; 6956 break; 6957 6958 case 0x33DA: 6959 // For case ignore, numeric, and stored prefix string matching rules, 6960 // characters are case folded per B.2 of [RFC3454] : U+33DA 6961 target[limit++] = 0x0070; 6962 target[limit++] = 0x0072; 6963 break; 6964 6965 case 0x33DC: 6966 // For case ignore, numeric, and stored prefix string matching rules, 6967 // characters are case folded per B.2 of [RFC3454] : U+33DC 6968 target[limit++] = 0x0073; 6969 target[limit++] = 0x0076; 6970 break; 6971 6972 case 0x33DD: 6973 // For case ignore, numeric, and stored prefix string matching rules, 6974 // characters are case folded per B.2 of [RFC3454] : U+33DD 6975 target[limit++] = 0x0077; 6976 target[limit++] = 0x0062; 6977 break; 6978 6979 case 0xFB00: 6980 // For case ignore, numeric, and stored prefix string matching rules, 6981 // characters are case folded per B.2 of [RFC3454] : U+FB00 6982 target[limit++] = 0x0066; 6983 target[limit++] = 0x0066; 6984 break; 6985 6986 case 0xFB01: 6987 // For case ignore, numeric, and stored prefix string matching rules, 6988 // characters are case folded per B.2 of [RFC3454] : U+FB01 6989 target[limit++] = 0x0066; 6990 target[limit++] = 0x0069; 6991 break; 6992 6993 case 0xFB02: 6994 // For case ignore, numeric, and stored prefix string matching rules, 6995 // characters are case folded per B.2 of [RFC3454] : U+FB02 6996 target[limit++] = 0x0066; 6997 target[limit++] = 0x006C; 6998 break; 6999 7000 case 0xFB03: 7001 // For case ignore, numeric, and stored prefix string matching rules, 7002 // characters are case folded per B.2 of [RFC3454] : U+FB03 7003 target[limit++] = 0x0066; 7004 target[limit++] = 0x0066; 7005 target[limit++] = 0x0069; 7006 break; 7007 7008 case 0xFB04: 7009 // For case ignore, numeric, and stored prefix string matching rules, 7010 // characters are case folded per B.2 of [RFC3454] : U+FB04 7011 target[limit++] = 0x0066; 7012 target[limit++] = 0x0066; 7013 target[limit++] = 0x006C; 7014 break; 7015 7016 case 0xFB05: 7017 // For case ignore, numeric, and stored prefix string matching rules, 7018 // characters are case folded per B.2 of [RFC3454] : U+FB05 7019 target[limit++] = 0x0073; 7020 target[limit++] = 0x0074; 7021 break; 7022 7023 case 0xFB06: 7024 // For case ignore, numeric, and stored prefix string matching rules, 7025 // characters are case folded per B.2 of [RFC3454] : U+FB06 7026 target[limit++] = 0x0073; 7027 target[limit++] = 0x0074; 7028 break; 7029 7030 case 0xFB13: 7031 // For case ignore, numeric, and stored prefix string matching rules, 7032 // characters are case folded per B.2 of [RFC3454] : U+FB13 7033 target[limit++] = 0x0574; 7034 target[limit++] = 0x0576; 7035 break; 7036 7037 case 0xFB14: 7038 // For case ignore, numeric, and stored prefix string matching rules, 7039 // characters are case folded per B.2 of [RFC3454] : U+FB14 7040 target[limit++] = 0x0574; 7041 target[limit++] = 0x0565; 7042 break; 7043 7044 case 0xFB15: 7045 // For case ignore, numeric, and stored prefix string matching rules, 7046 // characters are case folded per B.2 of [RFC3454] : U+FB15 7047 target[limit++] = 0x0574; 7048 target[limit++] = 0x056B; 7049 break; 7050 7051 case 0xFB16: 7052 // For case ignore, numeric, and stored prefix string matching rules, 7053 // characters are case folded per B.2 of [RFC3454] : U+FB16 7054 target[limit++] = 0x057E; 7055 target[limit++] = 0x0576; 7056 break; 7057 7058 case 0xFB17: 7059 // For case ignore, numeric, and stored prefix string matching rules, 7060 // characters are case folded per B.2 of [RFC3454] : U+FB17 7061 target[limit++] = 0x0574; 7062 target[limit++] = 0x056D; 7063 break; 7064 7065 case 0xFE00: 7066 case 0xFE01: 7067 case 0xFE02: 7068 case 0xFE03: 7069 case 0xFE04: 7070 case 0xFE05: 7071 case 0xFE06: 7072 case 0xFE07: 7073 case 0xFE08: 7074 case 0xFE09: 7075 case 0xFE0A: 7076 case 0xFE0B: 7077 case 0xFE0C: 7078 case 0xFE0D: 7079 case 0xFE0E: 7080 case 0xFE0F: 7081 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 7082 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 7083 // VARIATION SELECTORs (U+180B-180D, FE00-FE0F) code points are also 7084 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 7085 // mapped to nothing. 7086 break; 7087 7088 case 0xFEFF: 7089 // All other control code (e.g., Cc) points or code points with a 7090 // control function (e.g., Cf) are mapped to nothing. The following is 7091 // a complete list of these code points: ... U+FEFF... 7092 break; 7093 7094 case 0xFF21: 7095 case 0xFF22: 7096 case 0xFF23: 7097 case 0xFF24: 7098 case 0xFF25: 7099 case 0xFF26: 7100 case 0xFF27: 7101 case 0xFF28: 7102 case 0xFF29: 7103 case 0xFF2A: 7104 case 0xFF2B: 7105 case 0xFF2C: 7106 case 0xFF2D: 7107 case 0xFF2E: 7108 case 0xFF2F: 7109 case 0xFF30: 7110 case 0xFF31: 7111 case 0xFF32: 7112 case 0xFF33: 7113 case 0xFF34: 7114 case 0xFF35: 7115 case 0xFF36: 7116 case 0xFF37: 7117 case 0xFF38: 7118 case 0xFF39: 7119 case 0xFF3A: 7120 // For case ignore, numeric, and stored prefix string matching rules, 7121 // characters are case folded per B.2 of [RFC3454] : U+FF21-FF3A 7122 target[limit++] = ( char ) ( c + 0x0020 ); 7123 break; 7124 7125 case 0xFFF9: 7126 case 0xFFFA: 7127 case 0xFFFB: 7128 // All other control code (e.g., Cc) points or code points with a 7129 // control function (e.g., Cf) are mapped to nothing. The following is 7130 // a complete list of these code points: ... U+FFF9-FFFB... 7131 break; 7132 7133 case 0xFFFC: 7134 // SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 7135 // points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 7136 // VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 7137 // mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 7138 // mapped to nothing. 7139 break; 7140 7141 default: 7142 // First, eliminate surrogates, and replace them by FFFD char 7143 if ( ( c >= 0xD800 ) && ( c <= 0xDFFF ) ) 7144 { 7145 target[limit++] = 0xFFFD; 7146 break; 7147 } 7148 7149 target[limit++] = c; 7150 break; 7151 } 7152 } 7153 7154 return new String( target, 0, limit ); 7155 } 7156}