1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
21 #include "striconveh.h"
33 /* Magic number for detecting bounds violations. */
34 #define MAGIC 0x1983EFF1
37 new_offsets (size_t n
)
39 size_t *offsets
= (size_t *) malloc ((n
+ 1) * sizeof (size_t));
48 static enum iconv_ilseq_handler handlers
[] =
51 iconveh_question_mark
,
52 iconveh_replacement_character
,
53 iconveh_escape_sequence
60 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
61 ISO-8859-2, UTF-8, and with libiconv or glibc also GB18030. */
62 iconv_t cd_ascii_to_88591
= iconv_open ("ISO-8859-1", "ASCII");
63 iconv_t cd_88591_to_88592
= iconv_open ("ISO-8859-2", "ISO-8859-1");
64 iconv_t cd_88592_to_88591
= iconv_open ("ISO-8859-1", "ISO-8859-2");
65 iconv_t cd_ascii_to_utf8
= iconv_open ("UTF-8", "ASCII");
66 iconv_t cd_88591_to_utf8
= iconv_open ("UTF-8", "ISO-8859-1");
67 iconv_t cd_utf8_to_88591
= iconv_open ("ISO-8859-1", "UTF-8");
68 iconv_t cd_88592_to_utf8
= iconv_open ("UTF-8", "ISO-8859-2");
69 iconv_t cd_utf8_to_88592
= iconv_open ("ISO-8859-2", "UTF-8");
70 iconv_t cd_utf7_to_utf8
= iconv_open ("UTF-8", "UTF-7");
71 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
72 iconv_t cd_ascii_to_gb18030
= iconv_open ("GB18030", "ASCII");
73 iconv_t cd_utf8_to_gb18030
= iconv_open ("GB18030", "UTF-8");
74 iconv_t cd_88591_to_gb18030
= iconv_open ("GB18030", "ISO-8859-1");
75 iconv_t cd_utf7_to_gb18030
= iconv_open ("GB18030", "UTF-7");
77 iconveh_t cdeh_ascii_to_88591
;
78 iconveh_t cdeh_ascii_to_88591_indirectly
;
79 iconveh_t cdeh_88592_to_88591
;
80 iconveh_t cdeh_88592_to_88591_indirectly
;
81 iconveh_t cdeh_ascii_to_utf8
;
82 iconveh_t cdeh_88591_to_utf8
;
83 iconveh_t cdeh_utf8_to_88591
;
84 iconveh_t cdeh_utf7_to_utf8
;
85 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
86 iconveh_t cdeh_ascii_to_gb18030
;
87 iconveh_t cdeh_88591_to_gb18030
;
88 iconveh_t cdeh_utf7_to_gb18030
;
91 ASSERT (cd_ascii_to_utf8
!= (iconv_t
)(-1));
92 ASSERT (cd_88591_to_utf8
!= (iconv_t
)(-1));
93 ASSERT (cd_utf8_to_88591
!= (iconv_t
)(-1));
94 ASSERT (cd_88592_to_utf8
!= (iconv_t
)(-1));
95 ASSERT (cd_utf8_to_88592
!= (iconv_t
)(-1));
96 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
97 ASSERT (cd_ascii_to_gb18030
!= (iconv_t
)(-1));
98 ASSERT (cd_utf8_to_gb18030
!= (iconv_t
)(-1));
101 cdeh_ascii_to_88591
.cd
= cd_ascii_to_88591
;
102 cdeh_ascii_to_88591
.cd1
= cd_ascii_to_utf8
;
103 cdeh_ascii_to_88591
.cd2
= cd_utf8_to_88591
;
105 cdeh_ascii_to_88591_indirectly
.cd
= (iconv_t
)(-1);
106 cdeh_ascii_to_88591_indirectly
.cd1
= cd_ascii_to_utf8
;
107 cdeh_ascii_to_88591_indirectly
.cd2
= cd_utf8_to_88591
;
109 cdeh_88592_to_88591
.cd
= cd_88592_to_88591
;
110 cdeh_88592_to_88591
.cd1
= cd_88592_to_utf8
;
111 cdeh_88592_to_88591
.cd2
= cd_utf8_to_88591
;
113 cdeh_88592_to_88591_indirectly
.cd
= (iconv_t
)(-1);
114 cdeh_88592_to_88591_indirectly
.cd1
= cd_88592_to_utf8
;
115 cdeh_88592_to_88591_indirectly
.cd2
= cd_utf8_to_88591
;
117 cdeh_ascii_to_utf8
.cd
= cd_ascii_to_utf8
;
118 cdeh_ascii_to_utf8
.cd1
= cd_ascii_to_utf8
;
119 cdeh_ascii_to_utf8
.cd2
= (iconv_t
)(-1);
121 cdeh_88591_to_utf8
.cd
= cd_88591_to_utf8
;
122 cdeh_88591_to_utf8
.cd1
= cd_88591_to_utf8
;
123 cdeh_88591_to_utf8
.cd2
= (iconv_t
)(-1);
125 cdeh_utf8_to_88591
.cd
= cd_utf8_to_88591
;
126 cdeh_utf8_to_88591
.cd1
= (iconv_t
)(-1);
127 cdeh_utf8_to_88591
.cd2
= cd_utf8_to_88591
;
129 cdeh_utf7_to_utf8
.cd
= cd_utf7_to_utf8
;
130 cdeh_utf7_to_utf8
.cd1
= cd_utf7_to_utf8
;
131 cdeh_utf7_to_utf8
.cd2
= (iconv_t
)(-1);
133 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
134 cdeh_ascii_to_gb18030
.cd
= cd_ascii_to_gb18030
;
135 cdeh_ascii_to_gb18030
.cd1
= cd_ascii_to_utf8
;
136 cdeh_ascii_to_gb18030
.cd2
= cd_utf8_to_gb18030
;
138 cdeh_88591_to_gb18030
.cd
= cd_88591_to_gb18030
;
139 cdeh_88591_to_gb18030
.cd1
= cd_88591_to_utf8
;
140 cdeh_88591_to_gb18030
.cd2
= cd_utf8_to_gb18030
;
142 cdeh_utf7_to_gb18030
.cd
= cd_utf7_to_gb18030
;
143 cdeh_utf7_to_gb18030
.cd1
= cd_utf7_to_utf8
;
144 cdeh_utf7_to_gb18030
.cd2
= cd_utf8_to_gb18030
;
147 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
149 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
150 for (indirect
= 0; indirect
<= 1; indirect
++)
152 for (h
= 0; h
< SIZEOF (handlers
); h
++)
154 enum iconv_ilseq_handler handler
= handlers
[h
];
155 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
156 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
157 for (o
= 0; o
< 2; o
++)
159 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
162 int retval
= mem_cd_iconveh (input
, strlen (input
),
164 ? &cdeh_88592_to_88591_indirectly
165 : &cdeh_88592_to_88591
),
169 ASSERT (retval
== 0);
170 ASSERT (length
== strlen (expected
));
171 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
174 for (i
= 0; i
< 37; i
++)
175 ASSERT (offsets
[i
] == i
);
176 ASSERT (offsets
[37] == MAGIC
);
184 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
185 for (indirect
= 0; indirect
<= 1; indirect
++)
187 for (h
= 0; h
< SIZEOF (handlers
); h
++)
189 enum iconv_ilseq_handler handler
= handlers
[h
];
190 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
191 for (o
= 0; o
< 2; o
++)
193 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
196 int retval
= mem_cd_iconveh (input
, strlen (input
),
198 ? &cdeh_ascii_to_88591_indirectly
199 : &cdeh_ascii_to_88591
),
206 ASSERT (retval
== -1 && errno
== EILSEQ
);
207 ASSERT (result
== NULL
);
211 case iconveh_question_mark
:
212 case iconveh_replacement_character
:
213 case iconveh_escape_sequence
:
215 static const char expected
[] = "Rafa? Maszkowski";
216 ASSERT (retval
== 0);
217 ASSERT (length
== strlen (expected
));
218 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
221 for (i
= 0; i
< 16; i
++)
222 ASSERT (offsets
[i
] == i
);
223 ASSERT (offsets
[16] == MAGIC
);
234 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
235 for (indirect
= 0; indirect
<= 1; indirect
++)
237 for (h
= 0; h
< SIZEOF (handlers
); h
++)
239 enum iconv_ilseq_handler handler
= handlers
[h
];
240 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
241 for (o
= 0; o
< 2; o
++)
243 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
246 int retval
= mem_cd_iconveh (input
, strlen (input
),
248 ? &cdeh_88592_to_88591_indirectly
249 : &cdeh_88592_to_88591
),
256 ASSERT (retval
== -1 && errno
== EILSEQ
);
257 ASSERT (result
== NULL
);
261 case iconveh_question_mark
:
262 case iconveh_replacement_character
:
264 static const char expected
[] = "Rafa? Maszkowski";
265 ASSERT (retval
== 0);
266 ASSERT (length
== strlen (expected
));
267 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
270 for (i
= 0; i
< 16; i
++)
271 ASSERT (offsets
[i
] == i
);
272 ASSERT (offsets
[16] == MAGIC
);
278 case iconveh_escape_sequence
:
280 static const char expected
[] = "Rafa\\u0142 Maszkowski";
281 ASSERT (retval
== 0);
282 ASSERT (length
== strlen (expected
));
283 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
286 for (i
= 0; i
< 16; i
++)
287 ASSERT (offsets
[i
] == (i
< 5 ? i
:
289 ASSERT (offsets
[16] == MAGIC
);
300 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
301 for (h
= 0; h
< SIZEOF (handlers
); h
++)
303 enum iconv_ilseq_handler handler
= handlers
[h
];
304 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
305 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
306 for (o
= 0; o
< 2; o
++)
308 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
311 int retval
= mem_cd_iconveh (input
, strlen (input
),
316 ASSERT (retval
== 0);
317 ASSERT (length
== strlen (expected
));
318 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
321 for (i
= 0; i
< 37; i
++)
322 ASSERT (offsets
[i
] == (i
< 1 ? i
:
326 ASSERT (offsets
[37] == MAGIC
);
333 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
334 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
335 for (h
= 0; h
< SIZEOF (handlers
); h
++)
337 enum iconv_ilseq_handler handler
= handlers
[h
];
338 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
339 static const char expected
[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
340 for (o
= 0; o
< 2; o
++)
342 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
345 int retval
= mem_cd_iconveh (input
, strlen (input
),
346 &cdeh_88591_to_gb18030
,
350 ASSERT (retval
== 0);
351 ASSERT (length
== strlen (expected
));
352 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
355 for (i
= 0; i
< 37; i
++)
356 ASSERT (offsets
[i
] == (i
< 1 ? i
:
360 ASSERT (offsets
[37] == MAGIC
);
368 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
369 for (h
= 0; h
< SIZEOF (handlers
); h
++)
371 enum iconv_ilseq_handler handler
= handlers
[h
];
372 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
373 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
374 for (o
= 0; o
< 2; o
++)
376 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
379 int retval
= mem_cd_iconveh (input
, strlen (input
),
384 ASSERT (retval
== 0);
385 ASSERT (length
== strlen (expected
));
386 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
389 for (i
= 0; i
< 41; i
++)
390 ASSERT (offsets
[i
] == (i
< 1 ? i
:
391 i
== 1 ? (size_t)(-1) :
393 i
== 13 ? (size_t)(-1) :
395 i
== 20 ? (size_t)(-1) :
398 ASSERT (offsets
[41] == MAGIC
);
405 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
406 for (h
= 0; h
< SIZEOF (handlers
); h
++)
408 enum iconv_ilseq_handler handler
= handlers
[h
];
409 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
410 for (o
= 0; o
< 2; o
++)
412 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
415 int retval
= mem_cd_iconveh (input
, strlen (input
),
423 ASSERT (retval
== -1 && errno
== EILSEQ
);
424 ASSERT (result
== NULL
);
428 case iconveh_question_mark
:
429 case iconveh_escape_sequence
:
431 static const char expected
[] = "Rafa? Maszkowski";
432 ASSERT (retval
== 0);
433 ASSERT (length
== strlen (expected
));
434 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
437 for (i
= 0; i
< 16; i
++)
438 ASSERT (offsets
[i
] == i
);
439 ASSERT (offsets
[16] == MAGIC
);
445 case iconveh_replacement_character
:
447 static const char expected
[] = "Rafa\357\277\275 Maszkowski";
448 ASSERT (retval
== 0);
449 ASSERT (length
== strlen (expected
));
450 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
453 for (i
= 0; i
< 16; i
++)
454 ASSERT (offsets
[i
] == (i
< 5 ? i
: i
+ 2));
455 ASSERT (offsets
[16] == MAGIC
);
465 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || ((__GLIBC__ + (__GLIBC_MINOR__ >= 16) > 2) && !defined __UCLIBC__)
466 /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ).
467 Note: glibc's GB18030 converter was buggy in glibc-2.15; fixed by
468 Andreas Schwab on 2012-02-06. */
469 for (h
= 0; h
< SIZEOF (handlers
); h
++)
471 enum iconv_ilseq_handler handler
= handlers
[h
];
472 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
473 for (o
= 0; o
< 2; o
++)
475 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
478 int retval
= mem_cd_iconveh (input
, strlen (input
),
479 &cdeh_ascii_to_gb18030
,
486 ASSERT (retval
== -1 && errno
== EILSEQ
);
487 ASSERT (result
== NULL
);
491 case iconveh_question_mark
:
492 case iconveh_escape_sequence
:
494 static const char expected
[] = "Rafa? Maszkowski";
495 ASSERT (retval
== 0);
496 ASSERT (length
== strlen (expected
));
497 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
500 for (i
= 0; i
< 16; i
++)
501 ASSERT (offsets
[i
] == i
);
502 ASSERT (offsets
[16] == MAGIC
);
508 case iconveh_replacement_character
:
510 static const char expected
[] = "Rafa\2041\2447 Maszkowski";
511 ASSERT (retval
== 0);
512 ASSERT (length
== strlen (expected
));
513 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
516 for (i
= 0; i
< 16; i
++)
517 ASSERT (offsets
[i
] == (i
< 5 ? i
: i
+ 3));
518 ASSERT (offsets
[16] == MAGIC
);
529 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
530 for (h
= 0; h
< SIZEOF (handlers
); h
++)
532 enum iconv_ilseq_handler handler
= handlers
[h
];
533 static const char input
[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
534 for (o
= 0; o
< 2; o
++)
536 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
539 int retval
= mem_cd_iconveh (input
, strlen (input
),
547 ASSERT (retval
== -1 && errno
== EILSEQ
);
548 ASSERT (result
== NULL
);
552 case iconveh_question_mark
:
553 case iconveh_replacement_character
:
555 static const char expected
[] = "Rafa? Maszkowski";
556 ASSERT (retval
== 0);
557 ASSERT (length
== strlen (expected
));
558 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
561 for (i
= 0; i
< 17; i
++)
562 ASSERT (offsets
[i
] == (i
< 5 ? i
:
563 i
== 5 ? (size_t)(-1) :
565 ASSERT (offsets
[17] == MAGIC
);
571 case iconveh_escape_sequence
:
573 static const char expected
[] = "Rafa\\u0142 Maszkowski";
574 ASSERT (retval
== 0);
575 ASSERT (length
== strlen (expected
));
576 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
579 for (i
= 0; i
< 17; i
++)
580 ASSERT (offsets
[i
] == (i
< 5 ? i
:
581 i
== 5 ? (size_t)(-1) :
583 ASSERT (offsets
[17] == MAGIC
);
593 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
594 for (h
= 0; h
< SIZEOF (handlers
); h
++)
596 enum iconv_ilseq_handler handler
= handlers
[h
];
597 static const char input
[] = "\342";
598 for (o
= 0; o
< 2; o
++)
600 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
603 int retval
= mem_cd_iconveh (input
, strlen (input
),
608 ASSERT (retval
== 0);
609 ASSERT (length
== 0);
612 ASSERT (offsets
[0] == 0);
613 ASSERT (offsets
[1] == MAGIC
);
620 if (cd_utf7_to_utf8
!= (iconv_t
)(-1))
622 /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
623 -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */
624 # if !(defined __sun && !defined _LIBICONV_VERSION)
625 /* Test conversion from UTF-7 to UTF-8 with EINVAL. */
626 for (h
= 0; h
< SIZEOF (handlers
); h
++)
628 enum iconv_ilseq_handler handler
= handlers
[h
];
629 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
630 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
631 static const char input
[] = "+VDLYP9hA";
632 static const char expected1
[] = "\345\220\262"; /* 吲 glibc */
633 static const char expected2
[] = ""; /* libiconv */
636 int retval
= mem_cd_iconveh (input
, 7,
641 ASSERT (retval
== 0);
642 ASSERT (length
== strlen (expected1
) || length
== strlen (expected2
));
643 ASSERT (result
!= NULL
);
644 if (length
== strlen (expected1
))
645 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
647 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0);
651 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
652 /* Test conversion from UTF-7 to GB18030 with EINVAL. */
653 for (h
= 0; h
< SIZEOF (handlers
); h
++)
655 enum iconv_ilseq_handler handler
= handlers
[h
];
656 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
657 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
658 static const char input
[] = "+VDLYP9hA";
659 static const char expected1
[] = "\337\305"; /* 吲 glibc */
660 static const char expected2
[] = ""; /* libiconv */
663 int retval
= mem_cd_iconveh (input
, 7,
664 &cdeh_utf7_to_gb18030
,
668 ASSERT (retval
== 0);
669 ASSERT (length
== strlen (expected1
) || length
== strlen (expected2
));
670 ASSERT (result
!= NULL
);
671 if (length
== strlen (expected1
))
672 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
674 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0);
679 /* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts
680 the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8. */
681 # if !(defined __NetBSD__ && !defined _LIBICONV_VERSION)
682 /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */
683 for (h
= 0; h
< SIZEOF (handlers
); h
++)
685 enum iconv_ilseq_handler handler
= handlers
[h
];
686 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
687 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
688 static const char input
[] = "+2D/YQNhB";
691 int retval
= mem_cd_iconveh (input
, strlen (input
),
699 ASSERT (retval
== -1 && errno
== EILSEQ
);
700 ASSERT (result
== NULL
);
702 case iconveh_question_mark
:
703 case iconveh_escape_sequence
:
706 static const char expected1
[] = "?????";
707 /* libiconv <= 1.12 result */
708 static const char expected2
[] = "?2D/YQNhB";
709 /* libiconv behaviour changed in version 1.13: the result is
710 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
712 static const char expected3
[] = "?\340\277\266\341\200\266";
713 ASSERT (retval
== 0);
714 ASSERT (length
== strlen (expected1
)
715 || length
== strlen (expected2
)
716 || length
== strlen (expected3
));
717 ASSERT (result
!= NULL
);
718 if (length
== strlen (expected1
))
719 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
720 else if (length
== strlen (expected2
))
721 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0);
723 ASSERT (memcmp (result
, expected3
, strlen (expected3
)) == 0);
727 case iconveh_replacement_character
:
730 static const char expected1
[] = "\357\277\275\357\277\275\357\277\275\357\277\275\357\277\275";
731 /* libiconv <= 1.12 result */
732 static const char expected2
[] = "\357\277\2752D/YQNhB";
733 /* libiconv >= 1.13 result */
734 static const char expected3
[] = "\357\277\275\340\277\266\341\200\266";
735 ASSERT (retval
== 0);
736 ASSERT (length
== strlen (expected1
)
737 || length
== strlen (expected2
)
738 || length
== strlen (expected3
));
739 ASSERT (result
!= NULL
);
740 if (length
== strlen (expected1
))
741 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
742 else if (length
== strlen (expected2
))
743 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0);
745 ASSERT (memcmp (result
, expected3
, strlen (expected3
)) == 0);
751 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || ((__GLIBC__ + (__GLIBC_MINOR__ >= 16) > 2) && !defined __UCLIBC__)
752 /* Test conversion from UTF-7 to GB18030 with EILSEQ.
753 Note: glibc's GB18030 converter was buggy in glibc-2.15; fixed by
754 Andreas Schwab on 2012-02-06. */
755 for (h
= 0; h
< SIZEOF (handlers
); h
++)
757 enum iconv_ilseq_handler handler
= handlers
[h
];
758 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
759 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
760 static const char input
[] = "+2D/YQNhB";
763 int retval
= mem_cd_iconveh (input
, strlen (input
),
764 &cdeh_utf7_to_gb18030
,
771 ASSERT (retval
== -1 && errno
== EILSEQ
);
772 ASSERT (result
== NULL
);
774 case iconveh_question_mark
:
775 case iconveh_escape_sequence
:
778 static const char expected1
[] = "?????";
779 /* libiconv <= 1.12 result */
780 static const char expected2
[] = "?2D/YQNhB";
781 /* libiconv behaviour changed in version 1.13: the result is
782 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
784 static const char expected3
[] = "?\2013\2030\2013\2114";
785 ASSERT (retval
== 0);
786 ASSERT (length
== strlen (expected1
)
787 || length
== strlen (expected2
)
788 || length
== strlen (expected3
));
789 ASSERT (result
!= NULL
);
790 if (length
== strlen (expected1
))
791 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
792 else if (length
== strlen (expected2
))
793 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0
794 || memcmp (result
, expected3
, strlen (expected3
)) == 0);
798 case iconveh_replacement_character
:
801 static const char expected1
[] = "\2041\2447\2041\2447\2041\2447\2041\2447\2041\2447";
802 /* libiconv <= 1.12 result */
803 static const char expected2
[] = "\2041\24472D/YQNhB";
804 /* libiconv >= 1.13 result */
805 static const char expected3
[] = "\2041\2447\2013\2030\2013\2114";
806 ASSERT (retval
== 0);
807 ASSERT (length
== strlen (expected1
)
808 || length
== strlen (expected2
)
809 || length
== strlen (expected3
));
810 ASSERT (result
!= NULL
);
811 if (length
== strlen (expected1
))
812 ASSERT (memcmp (result
, expected1
, strlen (expected1
)) == 0);
813 else if (length
== strlen (expected2
))
814 ASSERT (memcmp (result
, expected2
, strlen (expected2
)) == 0
815 || memcmp (result
, expected3
, strlen (expected3
)) == 0);
825 /* ------------------------ Test str_cd_iconveh() ------------------------ */
827 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
828 for (indirect
= 0; indirect
<= 1; indirect
++)
830 for (h
= 0; h
< SIZEOF (handlers
); h
++)
832 enum iconv_ilseq_handler handler
= handlers
[h
];
833 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
834 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
835 char *result
= str_cd_iconveh (input
,
837 ? &cdeh_88592_to_88591_indirectly
838 : &cdeh_88592_to_88591
),
840 ASSERT (result
!= NULL
);
841 ASSERT (strcmp (result
, expected
) == 0);
846 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
847 for (indirect
= 0; indirect
<= 1; indirect
++)
849 for (h
= 0; h
< SIZEOF (handlers
); h
++)
851 enum iconv_ilseq_handler handler
= handlers
[h
];
852 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
853 char *result
= str_cd_iconveh (input
,
855 ? &cdeh_ascii_to_88591_indirectly
856 : &cdeh_ascii_to_88591
),
861 ASSERT (result
== NULL
&& errno
== EILSEQ
);
863 case iconveh_question_mark
:
864 case iconveh_replacement_character
:
865 case iconveh_escape_sequence
:
867 static const char expected
[] = "Rafa? Maszkowski";
868 ASSERT (result
!= NULL
);
869 ASSERT (strcmp (result
, expected
) == 0);
877 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
878 for (indirect
= 0; indirect
<= 1; indirect
++)
880 for (h
= 0; h
< SIZEOF (handlers
); h
++)
882 enum iconv_ilseq_handler handler
= handlers
[h
];
883 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
884 char *result
= str_cd_iconveh (input
,
886 ? &cdeh_88592_to_88591_indirectly
887 : &cdeh_88592_to_88591
),
892 ASSERT (result
== NULL
&& errno
== EILSEQ
);
894 case iconveh_question_mark
:
895 case iconveh_replacement_character
:
897 static const char expected
[] = "Rafa? Maszkowski";
898 ASSERT (result
!= NULL
);
899 ASSERT (strcmp (result
, expected
) == 0);
903 case iconveh_escape_sequence
:
905 static const char expected
[] = "Rafa\\u0142 Maszkowski";
906 ASSERT (result
!= NULL
);
907 ASSERT (strcmp (result
, expected
) == 0);
915 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
916 for (h
= 0; h
< SIZEOF (handlers
); h
++)
918 enum iconv_ilseq_handler handler
= handlers
[h
];
919 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
920 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
921 char *result
= str_cd_iconveh (input
,
924 ASSERT (result
!= NULL
);
925 ASSERT (strcmp (result
, expected
) == 0);
929 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
930 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
931 for (h
= 0; h
< SIZEOF (handlers
); h
++)
933 enum iconv_ilseq_handler handler
= handlers
[h
];
934 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
935 static const char expected
[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
936 char *result
= str_cd_iconveh (input
,
937 &cdeh_88591_to_gb18030
,
939 ASSERT (result
!= NULL
);
940 ASSERT (strcmp (result
, expected
) == 0);
945 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
946 for (h
= 0; h
< SIZEOF (handlers
); h
++)
948 enum iconv_ilseq_handler handler
= handlers
[h
];
949 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
950 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
951 char *result
= str_cd_iconveh (input
,
954 ASSERT (result
!= NULL
);
955 ASSERT (strcmp (result
, expected
) == 0);
959 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
960 for (h
= 0; h
< SIZEOF (handlers
); h
++)
962 enum iconv_ilseq_handler handler
= handlers
[h
];
963 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
964 char *result
= str_cd_iconveh (input
,
970 ASSERT (result
== NULL
&& errno
== EILSEQ
);
972 case iconveh_question_mark
:
973 case iconveh_escape_sequence
:
975 static const char expected
[] = "Rafa? Maszkowski";
976 ASSERT (result
!= NULL
);
977 ASSERT (strcmp (result
, expected
) == 0);
981 case iconveh_replacement_character
:
983 static const char expected
[] = "Rafa\357\277\275 Maszkowski";
984 ASSERT (result
!= NULL
);
985 ASSERT (strcmp (result
, expected
) == 0);
992 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || ((__GLIBC__ + (__GLIBC_MINOR__ >= 16) > 2) && !defined __UCLIBC__)
993 /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ).
994 Note: glibc's GB18030 converter was buggy in glibc-2.15; fixed by
995 Andreas Schwab on 2012-02-06. */
996 for (h
= 0; h
< SIZEOF (handlers
); h
++)
998 enum iconv_ilseq_handler handler
= handlers
[h
];
999 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
1000 char *result
= str_cd_iconveh (input
,
1001 &cdeh_ascii_to_gb18030
,
1006 ASSERT (result
== NULL
&& errno
== EILSEQ
);
1008 case iconveh_question_mark
:
1009 case iconveh_escape_sequence
:
1011 static const char expected
[] = "Rafa? Maszkowski";
1012 ASSERT (result
!= NULL
);
1013 ASSERT (strcmp (result
, expected
) == 0);
1017 case iconveh_replacement_character
:
1019 static const char expected
[] = "Rafa\2041\2447 Maszkowski";
1020 ASSERT (result
!= NULL
);
1021 ASSERT (strcmp (result
, expected
) == 0);
1029 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1030 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1032 enum iconv_ilseq_handler handler
= handlers
[h
];
1033 static const char input
[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1034 char *result
= str_cd_iconveh (input
,
1035 &cdeh_utf8_to_88591
,
1040 ASSERT (result
== NULL
&& errno
== EILSEQ
);
1042 case iconveh_question_mark
:
1043 case iconveh_replacement_character
:
1045 static const char expected
[] = "Costs: 27 ?";
1046 ASSERT (result
!= NULL
);
1047 ASSERT (strcmp (result
, expected
) == 0);
1051 case iconveh_escape_sequence
:
1053 static const char expected
[] = "Costs: 27 \\u20AC";
1054 ASSERT (result
!= NULL
);
1055 ASSERT (strcmp (result
, expected
) == 0);
1062 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1063 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1065 enum iconv_ilseq_handler handler
= handlers
[h
];
1066 static const char input
[] = "\342";
1067 char *result
= str_cd_iconveh (input
,
1068 &cdeh_utf8_to_88591
,
1070 ASSERT (result
!= NULL
);
1071 ASSERT (strcmp (result
, "") == 0);
1075 if (cd_88591_to_88592
!= (iconv_t
)(-1))
1076 iconv_close (cd_88591_to_88592
);
1077 if (cd_88592_to_88591
!= (iconv_t
)(-1))
1078 iconv_close (cd_88592_to_88591
);
1079 iconv_close (cd_88591_to_utf8
);
1080 iconv_close (cd_utf8_to_88591
);
1081 iconv_close (cd_88592_to_utf8
);
1082 iconv_close (cd_utf8_to_88592
);
1084 /* ------------------------- Test mem_iconveh() ------------------------- */
1086 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1087 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1089 enum iconv_ilseq_handler handler
= handlers
[h
];
1090 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1091 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1092 for (o
= 0; o
< 2; o
++)
1094 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1095 char *result
= NULL
;
1097 int retval
= mem_iconveh (input
, strlen (input
),
1098 "ISO-8859-2", "ISO-8859-1",
1102 ASSERT (retval
== 0);
1103 ASSERT (length
== strlen (expected
));
1104 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1107 for (i
= 0; i
< 37; i
++)
1108 ASSERT (offsets
[i
] == i
);
1109 ASSERT (offsets
[37] == MAGIC
);
1116 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1117 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1119 enum iconv_ilseq_handler handler
= handlers
[h
];
1120 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1121 for (o
= 0; o
< 2; o
++)
1123 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1124 char *result
= NULL
;
1126 int retval
= mem_iconveh (input
, strlen (input
),
1127 "ISO-8859-2", "ISO-8859-1",
1134 ASSERT (retval
== -1 && errno
== EILSEQ
);
1135 ASSERT (result
== NULL
);
1139 case iconveh_question_mark
:
1140 case iconveh_replacement_character
:
1142 static const char expected
[] = "Rafa? Maszkowski";
1143 ASSERT (retval
== 0);
1144 ASSERT (length
== strlen (expected
));
1145 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1148 for (i
= 0; i
< 16; i
++)
1149 ASSERT (offsets
[i
] == i
);
1150 ASSERT (offsets
[16] == MAGIC
);
1156 case iconveh_escape_sequence
:
1158 static const char expected
[] = "Rafa\\u0142 Maszkowski";
1159 ASSERT (retval
== 0);
1160 ASSERT (length
== strlen (expected
));
1161 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1164 for (i
= 0; i
< 16; i
++)
1165 ASSERT (offsets
[i
] == (i
< 5 ? i
:
1167 ASSERT (offsets
[16] == MAGIC
);
1177 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1178 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1180 enum iconv_ilseq_handler handler
= handlers
[h
];
1181 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1182 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1183 for (o
= 0; o
< 2; o
++)
1185 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1186 char *result
= NULL
;
1188 int retval
= mem_iconveh (input
, strlen (input
),
1189 "ISO-8859-1", "UTF-8",
1193 ASSERT (retval
== 0);
1194 ASSERT (length
== strlen (expected
));
1195 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1198 for (i
= 0; i
< 37; i
++)
1199 ASSERT (offsets
[i
] == (i
< 1 ? i
:
1203 ASSERT (offsets
[37] == MAGIC
);
1210 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
1211 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
1212 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1214 enum iconv_ilseq_handler handler
= handlers
[h
];
1215 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1216 static const char expected
[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
1217 for (o
= 0; o
< 2; o
++)
1219 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1220 char *result
= NULL
;
1222 int retval
= mem_iconveh (input
, strlen (input
),
1223 "ISO-8859-1", "GB18030",
1227 ASSERT (retval
== 0);
1228 ASSERT (length
== strlen (expected
));
1229 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1232 for (i
= 0; i
< 37; i
++)
1233 ASSERT (offsets
[i
] == (i
< 1 ? i
:
1237 ASSERT (offsets
[37] == MAGIC
);
1245 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1246 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1248 enum iconv_ilseq_handler handler
= handlers
[h
];
1249 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1250 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1251 for (o
= 0; o
< 2; o
++)
1253 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1254 char *result
= NULL
;
1256 int retval
= mem_iconveh (input
, strlen (input
),
1257 "UTF-8", "ISO-8859-1",
1261 ASSERT (retval
== 0);
1262 ASSERT (length
== strlen (expected
));
1263 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1266 for (i
= 0; i
< 41; i
++)
1267 ASSERT (offsets
[i
] == (i
< 1 ? i
:
1268 i
== 1 ? (size_t)(-1) :
1270 i
== 13 ? (size_t)(-1) :
1272 i
== 20 ? (size_t)(-1) :
1275 ASSERT (offsets
[41] == MAGIC
);
1282 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1283 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1285 enum iconv_ilseq_handler handler
= handlers
[h
];
1286 static const char input
[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
1287 for (o
= 0; o
< 2; o
++)
1289 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1290 char *result
= NULL
;
1292 int retval
= mem_iconveh (input
, strlen (input
),
1293 "UTF-8", "ISO-8859-1",
1300 ASSERT (retval
== -1 && errno
== EILSEQ
);
1301 ASSERT (result
== NULL
);
1305 case iconveh_question_mark
:
1306 case iconveh_replacement_character
:
1308 static const char expected
[] = "Rafa? Maszkowski";
1309 ASSERT (retval
== 0);
1310 ASSERT (length
== strlen (expected
));
1311 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1314 for (i
= 0; i
< 17; i
++)
1315 ASSERT (offsets
[i
] == (i
< 5 ? i
:
1316 i
== 5 ? (size_t)(-1) :
1318 ASSERT (offsets
[17] == MAGIC
);
1324 case iconveh_escape_sequence
:
1326 static const char expected
[] = "Rafa\\u0142 Maszkowski";
1327 ASSERT (retval
== 0);
1328 ASSERT (length
== strlen (expected
));
1329 ASSERT (result
!= NULL
&& memcmp (result
, expected
, strlen (expected
)) == 0);
1332 for (i
= 0; i
< 17; i
++)
1333 ASSERT (offsets
[i
] == (i
< 5 ? i
:
1334 i
== 5 ? (size_t)(-1) :
1336 ASSERT (offsets
[17] == MAGIC
);
1346 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1347 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1349 enum iconv_ilseq_handler handler
= handlers
[h
];
1350 static const char input
[] = "\342";
1351 for (o
= 0; o
< 2; o
++)
1353 size_t *offsets
= (o
? new_offsets (strlen (input
)) : NULL
);
1354 char *result
= NULL
;
1356 int retval
= mem_iconveh (input
, strlen (input
),
1357 "UTF-8", "ISO-8859-1",
1361 ASSERT (retval
== 0);
1362 ASSERT (length
== 0);
1365 ASSERT (offsets
[0] == 0);
1366 ASSERT (offsets
[1] == MAGIC
);
1373 /* ------------------------- Test str_iconveh() ------------------------- */
1375 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1376 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1378 enum iconv_ilseq_handler handler
= handlers
[h
];
1379 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1380 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1381 char *result
= str_iconveh (input
, "ISO-8859-2", "ISO-8859-1", handler
);
1382 ASSERT (result
!= NULL
);
1383 ASSERT (strcmp (result
, expected
) == 0);
1387 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1388 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1390 enum iconv_ilseq_handler handler
= handlers
[h
];
1391 static const char input
[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1392 char *result
= str_iconveh (input
, "ISO-8859-2", "ISO-8859-1", handler
);
1396 ASSERT (result
== NULL
&& errno
== EILSEQ
);
1398 case iconveh_question_mark
:
1399 case iconveh_replacement_character
:
1401 static const char expected
[] = "Rafa? Maszkowski";
1402 ASSERT (result
!= NULL
);
1403 ASSERT (strcmp (result
, expected
) == 0);
1407 case iconveh_escape_sequence
:
1409 static const char expected
[] = "Rafa\\u0142 Maszkowski";
1410 ASSERT (result
!= NULL
);
1411 ASSERT (strcmp (result
, expected
) == 0);
1418 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1419 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1421 enum iconv_ilseq_handler handler
= handlers
[h
];
1422 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1423 static const char expected
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1424 char *result
= str_iconveh (input
, "ISO-8859-1", "UTF-8", handler
);
1425 ASSERT (result
!= NULL
);
1426 ASSERT (strcmp (result
, expected
) == 0);
1430 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
1431 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
1432 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1434 enum iconv_ilseq_handler handler
= handlers
[h
];
1435 static const char input
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1436 static const char expected
[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
1437 char *result
= str_iconveh (input
, "ISO-8859-1", "GB18030", handler
);
1438 ASSERT (result
!= NULL
);
1439 ASSERT (strcmp (result
, expected
) == 0);
1444 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1445 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1447 enum iconv_ilseq_handler handler
= handlers
[h
];
1448 static const char input
[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1449 static const char expected
[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1450 char *result
= str_iconveh (input
, "UTF-8", "ISO-8859-1", handler
);
1451 ASSERT (result
!= NULL
);
1452 ASSERT (strcmp (result
, expected
) == 0);
1456 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1457 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1459 enum iconv_ilseq_handler handler
= handlers
[h
];
1460 static const char input
[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1461 char *result
= str_iconveh (input
, "UTF-8", "ISO-8859-1", handler
);
1465 ASSERT (result
== NULL
&& errno
== EILSEQ
);
1467 case iconveh_question_mark
:
1468 case iconveh_replacement_character
:
1470 static const char expected
[] = "Costs: 27 ?";
1471 ASSERT (result
!= NULL
);
1472 ASSERT (strcmp (result
, expected
) == 0);
1476 case iconveh_escape_sequence
:
1478 static const char expected
[] = "Costs: 27 \\u20AC";
1479 ASSERT (result
!= NULL
);
1480 ASSERT (strcmp (result
, expected
) == 0);
1487 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1488 for (h
= 0; h
< SIZEOF (handlers
); h
++)
1490 enum iconv_ilseq_handler handler
= handlers
[h
];
1491 static const char input
[] = "\342";
1492 char *result
= str_iconveh (input
, "UTF-8", "ISO-8859-1", handler
);
1493 ASSERT (result
!= NULL
);
1494 ASSERT (strcmp (result
, "") == 0);
1498 /* -------------------------------- Done. -------------------------------- */
1500 if (cd_ascii_to_88591
!= (iconv_t
)(-1))
1501 iconv_close (cd_ascii_to_88591
);
1502 iconv_close (cd_ascii_to_utf8
);
1503 if (cd_utf7_to_utf8
!= (iconv_t
)(-1))
1504 iconv_close (cd_utf7_to_utf8
);
1508 return test_exit_status
;