usleep tests: Avoid failure due to known Cygwin 3.5.3 bug.
[gnulib.git] / tests / test-striconveh.c
blob85f0047af180ca86464b31aeeb8c91ea1a8012b9
1 /* Test of character set conversion with error handling.
2 Copyright (C) 2007-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
19 #include <config.h>
21 #include "striconveh.h"
23 #if HAVE_ICONV
24 # include <iconv.h>
25 #endif
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "macros.h"
33 /* Magic number for detecting bounds violations. */
34 #define MAGIC 0x1983EFF1
36 static size_t *
37 new_offsets (size_t n)
39 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
40 offsets[n] = MAGIC;
41 return offsets;
44 int
45 main ()
47 #if HAVE_ICONV
48 static enum iconv_ilseq_handler handlers[] =
50 iconveh_error,
51 iconveh_question_mark,
52 iconveh_replacement_character,
53 iconveh_escape_sequence
55 size_t indirect;
56 size_t h;
57 size_t o;
58 size_t i;
60 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
61 ISO-8859-2, UTF-8, and with libiconv or glibc also GB18030. */
62 iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");
63 iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
64 iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
65 iconv_t cd_ascii_to_utf8 = iconv_open ("UTF-8", "ASCII");
66 iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
67 iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
68 iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
69 iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
70 iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
71 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
72 iconv_t cd_ascii_to_gb18030 = iconv_open ("GB18030", "ASCII");
73 iconv_t cd_utf8_to_gb18030 = iconv_open ("GB18030", "UTF-8");
74 iconv_t cd_88591_to_gb18030 = iconv_open ("GB18030", "ISO-8859-1");
75 iconv_t cd_utf7_to_gb18030 = iconv_open ("GB18030", "UTF-7");
76 # endif
77 iconveh_t cdeh_ascii_to_88591;
78 iconveh_t cdeh_ascii_to_88591_indirectly;
79 iconveh_t cdeh_88592_to_88591;
80 iconveh_t cdeh_88592_to_88591_indirectly;
81 iconveh_t cdeh_ascii_to_utf8;
82 iconveh_t cdeh_88591_to_utf8;
83 iconveh_t cdeh_utf8_to_88591;
84 iconveh_t cdeh_utf7_to_utf8;
85 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
86 iconveh_t cdeh_ascii_to_gb18030;
87 iconveh_t cdeh_88591_to_gb18030;
88 iconveh_t cdeh_utf7_to_gb18030;
89 # endif
91 ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));
92 ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
93 ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
94 ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
95 ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
96 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
97 ASSERT (cd_ascii_to_gb18030 != (iconv_t)(-1));
98 ASSERT (cd_utf8_to_gb18030 != (iconv_t)(-1));
99 # endif
101 cdeh_ascii_to_88591.cd = cd_ascii_to_88591;
102 cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8;
103 cdeh_ascii_to_88591.cd2 = cd_utf8_to_88591;
105 cdeh_ascii_to_88591_indirectly.cd = (iconv_t)(-1);
106 cdeh_ascii_to_88591_indirectly.cd1 = cd_ascii_to_utf8;
107 cdeh_ascii_to_88591_indirectly.cd2 = cd_utf8_to_88591;
109 cdeh_88592_to_88591.cd = cd_88592_to_88591;
110 cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
111 cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
113 cdeh_88592_to_88591_indirectly.cd = (iconv_t)(-1);
114 cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8;
115 cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591;
117 cdeh_ascii_to_utf8.cd = cd_ascii_to_utf8;
118 cdeh_ascii_to_utf8.cd1 = cd_ascii_to_utf8;
119 cdeh_ascii_to_utf8.cd2 = (iconv_t)(-1);
121 cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
122 cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
123 cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
125 cdeh_utf8_to_88591.cd = cd_utf8_to_88591;
126 cdeh_utf8_to_88591.cd1 = (iconv_t)(-1);
127 cdeh_utf8_to_88591.cd2 = cd_utf8_to_88591;
129 cdeh_utf7_to_utf8.cd = cd_utf7_to_utf8;
130 cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8;
131 cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1);
133 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
134 cdeh_ascii_to_gb18030.cd = cd_ascii_to_gb18030;
135 cdeh_ascii_to_gb18030.cd1 = cd_ascii_to_utf8;
136 cdeh_ascii_to_gb18030.cd2 = cd_utf8_to_gb18030;
138 cdeh_88591_to_gb18030.cd = cd_88591_to_gb18030;
139 cdeh_88591_to_gb18030.cd1 = cd_88591_to_utf8;
140 cdeh_88591_to_gb18030.cd2 = cd_utf8_to_gb18030;
142 cdeh_utf7_to_gb18030.cd = cd_utf7_to_gb18030;
143 cdeh_utf7_to_gb18030.cd1 = cd_utf7_to_utf8;
144 cdeh_utf7_to_gb18030.cd2 = cd_utf8_to_gb18030;
145 # endif
147 /* ------------------------ Test mem_cd_iconveh() ------------------------ */
149 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
150 for (indirect = 0; indirect <= 1; indirect++)
152 for (h = 0; h < SIZEOF (handlers); h++)
154 enum iconv_ilseq_handler handler = handlers[h];
155 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
156 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
157 for (o = 0; o < 2; o++)
159 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
160 char *result = NULL;
161 size_t length = 0;
162 int retval = mem_cd_iconveh (input, strlen (input),
163 (indirect
164 ? &cdeh_88592_to_88591_indirectly
165 : &cdeh_88592_to_88591),
166 handler,
167 offsets,
168 &result, &length);
169 ASSERT (retval == 0);
170 ASSERT (length == strlen (expected));
171 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
172 if (o)
174 for (i = 0; i < 37; i++)
175 ASSERT (offsets[i] == i);
176 ASSERT (offsets[37] == MAGIC);
177 free (offsets);
179 free (result);
184 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
185 for (indirect = 0; indirect <= 1; indirect++)
187 for (h = 0; h < SIZEOF (handlers); h++)
189 enum iconv_ilseq_handler handler = handlers[h];
190 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
191 for (o = 0; o < 2; o++)
193 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
194 char *result = NULL;
195 size_t length = 0;
196 int retval = mem_cd_iconveh (input, strlen (input),
197 (indirect
198 ? &cdeh_ascii_to_88591_indirectly
199 : &cdeh_ascii_to_88591),
200 handler,
201 offsets,
202 &result, &length);
203 switch (handler)
205 case iconveh_error:
206 ASSERT (retval == -1 && errno == EILSEQ);
207 ASSERT (result == NULL);
208 if (o)
209 free (offsets);
210 break;
211 case iconveh_question_mark:
212 case iconveh_replacement_character:
213 case iconveh_escape_sequence:
215 static const char expected[] = "Rafa? Maszkowski";
216 ASSERT (retval == 0);
217 ASSERT (length == strlen (expected));
218 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
219 if (o)
221 for (i = 0; i < 16; i++)
222 ASSERT (offsets[i] == i);
223 ASSERT (offsets[16] == MAGIC);
224 free (offsets);
226 free (result);
228 break;
234 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
235 for (indirect = 0; indirect <= 1; indirect++)
237 for (h = 0; h < SIZEOF (handlers); h++)
239 enum iconv_ilseq_handler handler = handlers[h];
240 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
241 for (o = 0; o < 2; o++)
243 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
244 char *result = NULL;
245 size_t length = 0;
246 int retval = mem_cd_iconveh (input, strlen (input),
247 (indirect
248 ? &cdeh_88592_to_88591_indirectly
249 : &cdeh_88592_to_88591),
250 handler,
251 offsets,
252 &result, &length);
253 switch (handler)
255 case iconveh_error:
256 ASSERT (retval == -1 && errno == EILSEQ);
257 ASSERT (result == NULL);
258 if (o)
259 free (offsets);
260 break;
261 case iconveh_question_mark:
262 case iconveh_replacement_character:
264 static const char expected[] = "Rafa? Maszkowski";
265 ASSERT (retval == 0);
266 ASSERT (length == strlen (expected));
267 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
268 if (o)
270 for (i = 0; i < 16; i++)
271 ASSERT (offsets[i] == i);
272 ASSERT (offsets[16] == MAGIC);
273 free (offsets);
275 free (result);
277 break;
278 case iconveh_escape_sequence:
280 static const char expected[] = "Rafa\\u0142 Maszkowski";
281 ASSERT (retval == 0);
282 ASSERT (length == strlen (expected));
283 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
284 if (o)
286 for (i = 0; i < 16; i++)
287 ASSERT (offsets[i] == (i < 5 ? i :
288 i + 5));
289 ASSERT (offsets[16] == MAGIC);
290 free (offsets);
292 free (result);
294 break;
300 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
301 for (h = 0; h < SIZEOF (handlers); h++)
303 enum iconv_ilseq_handler handler = handlers[h];
304 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
305 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
306 for (o = 0; o < 2; o++)
308 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
309 char *result = NULL;
310 size_t length = 0;
311 int retval = mem_cd_iconveh (input, strlen (input),
312 &cdeh_88591_to_utf8,
313 handler,
314 offsets,
315 &result, &length);
316 ASSERT (retval == 0);
317 ASSERT (length == strlen (expected));
318 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
319 if (o)
321 for (i = 0; i < 37; i++)
322 ASSERT (offsets[i] == (i < 1 ? i :
323 i < 12 ? i + 1 :
324 i < 18 ? i + 2 :
325 i + 3));
326 ASSERT (offsets[37] == MAGIC);
327 free (offsets);
329 free (result);
333 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
334 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
335 for (h = 0; h < SIZEOF (handlers); h++)
337 enum iconv_ilseq_handler handler = handlers[h];
338 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
339 static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
340 for (o = 0; o < 2; o++)
342 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
343 char *result = NULL;
344 size_t length = 0;
345 int retval = mem_cd_iconveh (input, strlen (input),
346 &cdeh_88591_to_gb18030,
347 handler,
348 offsets,
349 &result, &length);
350 ASSERT (retval == 0);
351 ASSERT (length == strlen (expected));
352 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
353 if (o)
355 for (i = 0; i < 37; i++)
356 ASSERT (offsets[i] == (i < 1 ? i :
357 i < 12 ? i + 3 :
358 i < 18 ? i + 6 :
359 i + 7));
360 ASSERT (offsets[37] == MAGIC);
361 free (offsets);
363 free (result);
366 # endif
368 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
369 for (h = 0; h < SIZEOF (handlers); h++)
371 enum iconv_ilseq_handler handler = handlers[h];
372 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
373 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
374 for (o = 0; o < 2; o++)
376 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
377 char *result = NULL;
378 size_t length = 0;
379 int retval = mem_cd_iconveh (input, strlen (input),
380 &cdeh_utf8_to_88591,
381 handler,
382 offsets,
383 &result, &length);
384 ASSERT (retval == 0);
385 ASSERT (length == strlen (expected));
386 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
387 if (o)
389 for (i = 0; i < 41; i++)
390 ASSERT (offsets[i] == (i < 1 ? i :
391 i == 1 ? (size_t)(-1) :
392 i < 13 ? i - 1 :
393 i == 13 ? (size_t)(-1) :
394 i < 20 ? i - 2 :
395 i == 20 ? (size_t)(-1) :
396 i < 40 ? i - 3 :
397 (size_t)(-1)));
398 ASSERT (offsets[41] == MAGIC);
399 free (offsets);
401 free (result);
405 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
406 for (h = 0; h < SIZEOF (handlers); h++)
408 enum iconv_ilseq_handler handler = handlers[h];
409 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
410 for (o = 0; o < 2; o++)
412 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
413 char *result = NULL;
414 size_t length = 0;
415 int retval = mem_cd_iconveh (input, strlen (input),
416 &cdeh_ascii_to_utf8,
417 handler,
418 offsets,
419 &result, &length);
420 switch (handler)
422 case iconveh_error:
423 ASSERT (retval == -1 && errno == EILSEQ);
424 ASSERT (result == NULL);
425 if (o)
426 free (offsets);
427 break;
428 case iconveh_question_mark:
429 case iconveh_escape_sequence:
431 static const char expected[] = "Rafa? Maszkowski";
432 ASSERT (retval == 0);
433 ASSERT (length == strlen (expected));
434 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
435 if (o)
437 for (i = 0; i < 16; i++)
438 ASSERT (offsets[i] == i);
439 ASSERT (offsets[16] == MAGIC);
440 free (offsets);
442 free (result);
444 break;
445 case iconveh_replacement_character:
447 static const char expected[] = "Rafa\357\277\275 Maszkowski";
448 ASSERT (retval == 0);
449 ASSERT (length == strlen (expected));
450 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
451 if (o)
453 for (i = 0; i < 16; i++)
454 ASSERT (offsets[i] == (i < 5 ? i : i + 2));
455 ASSERT (offsets[16] == MAGIC);
456 free (offsets);
458 free (result);
460 break;
465 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || ((__GLIBC__ + (__GLIBC_MINOR__ >= 16) > 2) && !defined __UCLIBC__)
466 /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ).
467 Note: glibc's GB18030 converter was buggy in glibc-2.15; fixed by
468 Andreas Schwab on 2012-02-06. */
469 for (h = 0; h < SIZEOF (handlers); h++)
471 enum iconv_ilseq_handler handler = handlers[h];
472 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
473 for (o = 0; o < 2; o++)
475 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
476 char *result = NULL;
477 size_t length = 0;
478 int retval = mem_cd_iconveh (input, strlen (input),
479 &cdeh_ascii_to_gb18030,
480 handler,
481 offsets,
482 &result, &length);
483 switch (handler)
485 case iconveh_error:
486 ASSERT (retval == -1 && errno == EILSEQ);
487 ASSERT (result == NULL);
488 if (o)
489 free (offsets);
490 break;
491 case iconveh_question_mark:
492 case iconveh_escape_sequence:
494 static const char expected[] = "Rafa? Maszkowski";
495 ASSERT (retval == 0);
496 ASSERT (length == strlen (expected));
497 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
498 if (o)
500 for (i = 0; i < 16; i++)
501 ASSERT (offsets[i] == i);
502 ASSERT (offsets[16] == MAGIC);
503 free (offsets);
505 free (result);
507 break;
508 case iconveh_replacement_character:
510 static const char expected[] = "Rafa\2041\2447 Maszkowski";
511 ASSERT (retval == 0);
512 ASSERT (length == strlen (expected));
513 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
514 if (o)
516 for (i = 0; i < 16; i++)
517 ASSERT (offsets[i] == (i < 5 ? i : i + 3));
518 ASSERT (offsets[16] == MAGIC);
519 free (offsets);
521 free (result);
523 break;
527 # endif
529 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
530 for (h = 0; h < SIZEOF (handlers); h++)
532 enum iconv_ilseq_handler handler = handlers[h];
533 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
534 for (o = 0; o < 2; o++)
536 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
537 char *result = NULL;
538 size_t length = 0;
539 int retval = mem_cd_iconveh (input, strlen (input),
540 &cdeh_utf8_to_88591,
541 handler,
542 offsets,
543 &result, &length);
544 switch (handler)
546 case iconveh_error:
547 ASSERT (retval == -1 && errno == EILSEQ);
548 ASSERT (result == NULL);
549 if (o)
550 free (offsets);
551 break;
552 case iconveh_question_mark:
553 case iconveh_replacement_character:
555 static const char expected[] = "Rafa? Maszkowski";
556 ASSERT (retval == 0);
557 ASSERT (length == strlen (expected));
558 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
559 if (o)
561 for (i = 0; i < 17; i++)
562 ASSERT (offsets[i] == (i < 5 ? i :
563 i == 5 ? (size_t)(-1) :
564 i - 1));
565 ASSERT (offsets[17] == MAGIC);
566 free (offsets);
568 free (result);
570 break;
571 case iconveh_escape_sequence:
573 static const char expected[] = "Rafa\\u0142 Maszkowski";
574 ASSERT (retval == 0);
575 ASSERT (length == strlen (expected));
576 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
577 if (o)
579 for (i = 0; i < 17; i++)
580 ASSERT (offsets[i] == (i < 5 ? i :
581 i == 5 ? (size_t)(-1) :
582 i + 4));
583 ASSERT (offsets[17] == MAGIC);
584 free (offsets);
586 free (result);
588 break;
593 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
594 for (h = 0; h < SIZEOF (handlers); h++)
596 enum iconv_ilseq_handler handler = handlers[h];
597 static const char input[] = "\342";
598 for (o = 0; o < 2; o++)
600 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
601 char *result = NULL;
602 size_t length = 0;
603 int retval = mem_cd_iconveh (input, strlen (input),
604 &cdeh_utf8_to_88591,
605 handler,
606 offsets,
607 &result, &length);
608 ASSERT (retval == 0);
609 ASSERT (length == 0);
610 if (o)
612 ASSERT (offsets[0] == 0);
613 ASSERT (offsets[1] == MAGIC);
614 free (offsets);
616 free (result);
620 if (cd_utf7_to_utf8 != (iconv_t)(-1))
622 /* Disabled on Solaris, because Solaris 9 iconv() is buggy: it returns
623 -1 / EILSEQ when converting the 7th byte of the input "+VDLYP9hA". */
624 # if !(defined __sun && !defined _LIBICONV_VERSION)
625 /* Test conversion from UTF-7 to UTF-8 with EINVAL. */
626 for (h = 0; h < SIZEOF (handlers); h++)
628 enum iconv_ilseq_handler handler = handlers[h];
629 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
630 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
631 static const char input[] = "+VDLYP9hA";
632 static const char expected1[] = "\345\220\262"; /* 吲 glibc */
633 static const char expected2[] = ""; /* libiconv */
634 char *result = NULL;
635 size_t length = 0;
636 int retval = mem_cd_iconveh (input, 7,
637 &cdeh_utf7_to_utf8,
638 handler,
639 NULL,
640 &result, &length);
641 ASSERT (retval == 0);
642 ASSERT (length == strlen (expected1) || length == strlen (expected2));
643 ASSERT (result != NULL);
644 if (length == strlen (expected1))
645 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
646 else
647 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
648 free (result);
651 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
652 /* Test conversion from UTF-7 to GB18030 with EINVAL. */
653 for (h = 0; h < SIZEOF (handlers); h++)
655 enum iconv_ilseq_handler handler = handlers[h];
656 /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would
657 convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */
658 static const char input[] = "+VDLYP9hA";
659 static const char expected1[] = "\337\305"; /* 吲 glibc */
660 static const char expected2[] = ""; /* libiconv */
661 char *result = NULL;
662 size_t length = 0;
663 int retval = mem_cd_iconveh (input, 7,
664 &cdeh_utf7_to_gb18030,
665 handler,
666 NULL,
667 &result, &length);
668 ASSERT (retval == 0);
669 ASSERT (length == strlen (expected1) || length == strlen (expected2));
670 ASSERT (result != NULL);
671 if (length == strlen (expected1))
672 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
673 else
674 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
675 free (result);
677 # endif
679 /* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts
680 the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8. */
681 # if !(defined __NetBSD__ && !defined _LIBICONV_VERSION)
682 /* Test conversion from UTF-7 to UTF-8 with EILSEQ. */
683 for (h = 0; h < SIZEOF (handlers); h++)
685 enum iconv_ilseq_handler handler = handlers[h];
686 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
687 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
688 static const char input[] = "+2D/YQNhB";
689 char *result = NULL;
690 size_t length = 0;
691 int retval = mem_cd_iconveh (input, strlen (input),
692 &cdeh_utf7_to_utf8,
693 handler,
694 NULL,
695 &result, &length);
696 switch (handler)
698 case iconveh_error:
699 ASSERT (retval == -1 && errno == EILSEQ);
700 ASSERT (result == NULL);
701 break;
702 case iconveh_question_mark:
703 case iconveh_escape_sequence:
705 /* glibc result */
706 static const char expected1[] = "?????";
707 /* libiconv <= 1.12 result */
708 static const char expected2[] = "?2D/YQNhB";
709 /* libiconv behaviour changed in version 1.13: the result is
710 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
711 by 6 bits. */
712 static const char expected3[] = "?\340\277\266\341\200\266";
713 ASSERT (retval == 0);
714 ASSERT (length == strlen (expected1)
715 || length == strlen (expected2)
716 || length == strlen (expected3));
717 ASSERT (result != NULL);
718 if (length == strlen (expected1))
719 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
720 else if (length == strlen (expected2))
721 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
722 else
723 ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
724 free (result);
726 break;
727 case iconveh_replacement_character:
729 /* glibc result */
730 static const char expected1[] = "\357\277\275\357\277\275\357\277\275\357\277\275\357\277\275";
731 /* libiconv <= 1.12 result */
732 static const char expected2[] = "\357\277\2752D/YQNhB";
733 /* libiconv >= 1.13 result */
734 static const char expected3[] = "\357\277\275\340\277\266\341\200\266";
735 ASSERT (retval == 0);
736 ASSERT (length == strlen (expected1)
737 || length == strlen (expected2)
738 || length == strlen (expected3));
739 ASSERT (result != NULL);
740 if (length == strlen (expected1))
741 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
742 else if (length == strlen (expected2))
743 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0);
744 else
745 ASSERT (memcmp (result, expected3, strlen (expected3)) == 0);
746 free (result);
751 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || ((__GLIBC__ + (__GLIBC_MINOR__ >= 16) > 2) && !defined __UCLIBC__)
752 /* Test conversion from UTF-7 to GB18030 with EILSEQ.
753 Note: glibc's GB18030 converter was buggy in glibc-2.15; fixed by
754 Andreas Schwab on 2012-02-06. */
755 for (h = 0; h < SIZEOF (handlers); h++)
757 enum iconv_ilseq_handler handler = handlers[h];
758 /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would
759 convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */
760 static const char input[] = "+2D/YQNhB";
761 char *result = NULL;
762 size_t length = 0;
763 int retval = mem_cd_iconveh (input, strlen (input),
764 &cdeh_utf7_to_gb18030,
765 handler,
766 NULL,
767 &result, &length);
768 switch (handler)
770 case iconveh_error:
771 ASSERT (retval == -1 && errno == EILSEQ);
772 ASSERT (result == NULL);
773 break;
774 case iconveh_question_mark:
775 case iconveh_escape_sequence:
777 /* glibc result */
778 static const char expected1[] = "?????";
779 /* libiconv <= 1.12 result */
780 static const char expected2[] = "?2D/YQNhB";
781 /* libiconv behaviour changed in version 1.13: the result is
782 '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left
783 by 6 bits. */
784 static const char expected3[] = "?\2013\2030\2013\2114";
785 ASSERT (retval == 0);
786 ASSERT (length == strlen (expected1)
787 || length == strlen (expected2)
788 || length == strlen (expected3));
789 ASSERT (result != NULL);
790 if (length == strlen (expected1))
791 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
792 else if (length == strlen (expected2))
793 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0
794 || memcmp (result, expected3, strlen (expected3)) == 0);
795 free (result);
797 break;
798 case iconveh_replacement_character:
800 /* glibc result */
801 static const char expected1[] = "\2041\2447\2041\2447\2041\2447\2041\2447\2041\2447";
802 /* libiconv <= 1.12 result */
803 static const char expected2[] = "\2041\24472D/YQNhB";
804 /* libiconv >= 1.13 result */
805 static const char expected3[] = "\2041\2447\2013\2030\2013\2114";
806 ASSERT (retval == 0);
807 ASSERT (length == strlen (expected1)
808 || length == strlen (expected2)
809 || length == strlen (expected3));
810 ASSERT (result != NULL);
811 if (length == strlen (expected1))
812 ASSERT (memcmp (result, expected1, strlen (expected1)) == 0);
813 else if (length == strlen (expected2))
814 ASSERT (memcmp (result, expected2, strlen (expected2)) == 0
815 || memcmp (result, expected3, strlen (expected3)) == 0);
816 free (result);
820 # endif
821 # endif
822 # endif
825 /* ------------------------ Test str_cd_iconveh() ------------------------ */
827 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
828 for (indirect = 0; indirect <= 1; indirect++)
830 for (h = 0; h < SIZEOF (handlers); h++)
832 enum iconv_ilseq_handler handler = handlers[h];
833 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
834 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
835 char *result = str_cd_iconveh (input,
836 (indirect
837 ? &cdeh_88592_to_88591_indirectly
838 : &cdeh_88592_to_88591),
839 handler);
840 ASSERT (result != NULL);
841 ASSERT (strcmp (result, expected) == 0);
842 free (result);
846 /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ). */
847 for (indirect = 0; indirect <= 1; indirect++)
849 for (h = 0; h < SIZEOF (handlers); h++)
851 enum iconv_ilseq_handler handler = handlers[h];
852 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
853 char *result = str_cd_iconveh (input,
854 (indirect
855 ? &cdeh_ascii_to_88591_indirectly
856 : &cdeh_ascii_to_88591),
857 handler);
858 switch (handler)
860 case iconveh_error:
861 ASSERT (result == NULL && errno == EILSEQ);
862 break;
863 case iconveh_question_mark:
864 case iconveh_replacement_character:
865 case iconveh_escape_sequence:
867 static const char expected[] = "Rafa? Maszkowski";
868 ASSERT (result != NULL);
869 ASSERT (strcmp (result, expected) == 0);
870 free (result);
872 break;
877 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
878 for (indirect = 0; indirect <= 1; indirect++)
880 for (h = 0; h < SIZEOF (handlers); h++)
882 enum iconv_ilseq_handler handler = handlers[h];
883 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
884 char *result = str_cd_iconveh (input,
885 (indirect
886 ? &cdeh_88592_to_88591_indirectly
887 : &cdeh_88592_to_88591),
888 handler);
889 switch (handler)
891 case iconveh_error:
892 ASSERT (result == NULL && errno == EILSEQ);
893 break;
894 case iconveh_question_mark:
895 case iconveh_replacement_character:
897 static const char expected[] = "Rafa? Maszkowski";
898 ASSERT (result != NULL);
899 ASSERT (strcmp (result, expected) == 0);
900 free (result);
902 break;
903 case iconveh_escape_sequence:
905 static const char expected[] = "Rafa\\u0142 Maszkowski";
906 ASSERT (result != NULL);
907 ASSERT (strcmp (result, expected) == 0);
908 free (result);
910 break;
915 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
916 for (h = 0; h < SIZEOF (handlers); h++)
918 enum iconv_ilseq_handler handler = handlers[h];
919 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
920 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
921 char *result = str_cd_iconveh (input,
922 &cdeh_88591_to_utf8,
923 handler);
924 ASSERT (result != NULL);
925 ASSERT (strcmp (result, expected) == 0);
926 free (result);
929 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
930 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
931 for (h = 0; h < SIZEOF (handlers); h++)
933 enum iconv_ilseq_handler handler = handlers[h];
934 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
935 static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
936 char *result = str_cd_iconveh (input,
937 &cdeh_88591_to_gb18030,
938 handler);
939 ASSERT (result != NULL);
940 ASSERT (strcmp (result, expected) == 0);
941 free (result);
943 # endif
945 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
946 for (h = 0; h < SIZEOF (handlers); h++)
948 enum iconv_ilseq_handler handler = handlers[h];
949 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
950 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
951 char *result = str_cd_iconveh (input,
952 &cdeh_utf8_to_88591,
953 handler);
954 ASSERT (result != NULL);
955 ASSERT (strcmp (result, expected) == 0);
956 free (result);
959 /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ). */
960 for (h = 0; h < SIZEOF (handlers); h++)
962 enum iconv_ilseq_handler handler = handlers[h];
963 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
964 char *result = str_cd_iconveh (input,
965 &cdeh_ascii_to_utf8,
966 handler);
967 switch (handler)
969 case iconveh_error:
970 ASSERT (result == NULL && errno == EILSEQ);
971 break;
972 case iconveh_question_mark:
973 case iconveh_escape_sequence:
975 static const char expected[] = "Rafa? Maszkowski";
976 ASSERT (result != NULL);
977 ASSERT (strcmp (result, expected) == 0);
978 free (result);
980 break;
981 case iconveh_replacement_character:
983 static const char expected[] = "Rafa\357\277\275 Maszkowski";
984 ASSERT (result != NULL);
985 ASSERT (strcmp (result, expected) == 0);
986 free (result);
988 break;
992 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || ((__GLIBC__ + (__GLIBC_MINOR__ >= 16) > 2) && !defined __UCLIBC__)
993 /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ).
994 Note: glibc's GB18030 converter was buggy in glibc-2.15; fixed by
995 Andreas Schwab on 2012-02-06. */
996 for (h = 0; h < SIZEOF (handlers); h++)
998 enum iconv_ilseq_handler handler = handlers[h];
999 static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
1000 char *result = str_cd_iconveh (input,
1001 &cdeh_ascii_to_gb18030,
1002 handler);
1003 switch (handler)
1005 case iconveh_error:
1006 ASSERT (result == NULL && errno == EILSEQ);
1007 break;
1008 case iconveh_question_mark:
1009 case iconveh_escape_sequence:
1011 static const char expected[] = "Rafa? Maszkowski";
1012 ASSERT (result != NULL);
1013 ASSERT (strcmp (result, expected) == 0);
1014 free (result);
1016 break;
1017 case iconveh_replacement_character:
1019 static const char expected[] = "Rafa\2041\2447 Maszkowski";
1020 ASSERT (result != NULL);
1021 ASSERT (strcmp (result, expected) == 0);
1022 free (result);
1024 break;
1027 # endif
1029 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1030 for (h = 0; h < SIZEOF (handlers); h++)
1032 enum iconv_ilseq_handler handler = handlers[h];
1033 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1034 char *result = str_cd_iconveh (input,
1035 &cdeh_utf8_to_88591,
1036 handler);
1037 switch (handler)
1039 case iconveh_error:
1040 ASSERT (result == NULL && errno == EILSEQ);
1041 break;
1042 case iconveh_question_mark:
1043 case iconveh_replacement_character:
1045 static const char expected[] = "Costs: 27 ?";
1046 ASSERT (result != NULL);
1047 ASSERT (strcmp (result, expected) == 0);
1048 free (result);
1050 break;
1051 case iconveh_escape_sequence:
1053 static const char expected[] = "Costs: 27 \\u20AC";
1054 ASSERT (result != NULL);
1055 ASSERT (strcmp (result, expected) == 0);
1056 free (result);
1058 break;
1062 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1063 for (h = 0; h < SIZEOF (handlers); h++)
1065 enum iconv_ilseq_handler handler = handlers[h];
1066 static const char input[] = "\342";
1067 char *result = str_cd_iconveh (input,
1068 &cdeh_utf8_to_88591,
1069 handler);
1070 ASSERT (result != NULL);
1071 ASSERT (strcmp (result, "") == 0);
1072 free (result);
1075 if (cd_88591_to_88592 != (iconv_t)(-1))
1076 iconv_close (cd_88591_to_88592);
1077 if (cd_88592_to_88591 != (iconv_t)(-1))
1078 iconv_close (cd_88592_to_88591);
1079 iconv_close (cd_88591_to_utf8);
1080 iconv_close (cd_utf8_to_88591);
1081 iconv_close (cd_88592_to_utf8);
1082 iconv_close (cd_utf8_to_88592);
1084 /* ------------------------- Test mem_iconveh() ------------------------- */
1086 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1087 for (h = 0; h < SIZEOF (handlers); h++)
1089 enum iconv_ilseq_handler handler = handlers[h];
1090 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1091 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1092 for (o = 0; o < 2; o++)
1094 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1095 char *result = NULL;
1096 size_t length = 0;
1097 int retval = mem_iconveh (input, strlen (input),
1098 "ISO-8859-2", "ISO-8859-1",
1099 handler,
1100 offsets,
1101 &result, &length);
1102 ASSERT (retval == 0);
1103 ASSERT (length == strlen (expected));
1104 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1105 if (o)
1107 for (i = 0; i < 37; i++)
1108 ASSERT (offsets[i] == i);
1109 ASSERT (offsets[37] == MAGIC);
1110 free (offsets);
1112 free (result);
1116 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1117 for (h = 0; h < SIZEOF (handlers); h++)
1119 enum iconv_ilseq_handler handler = handlers[h];
1120 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1121 for (o = 0; o < 2; o++)
1123 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1124 char *result = NULL;
1125 size_t length = 0;
1126 int retval = mem_iconveh (input, strlen (input),
1127 "ISO-8859-2", "ISO-8859-1",
1128 handler,
1129 offsets,
1130 &result, &length);
1131 switch (handler)
1133 case iconveh_error:
1134 ASSERT (retval == -1 && errno == EILSEQ);
1135 ASSERT (result == NULL);
1136 if (o)
1137 free (offsets);
1138 break;
1139 case iconveh_question_mark:
1140 case iconveh_replacement_character:
1142 static const char expected[] = "Rafa? Maszkowski";
1143 ASSERT (retval == 0);
1144 ASSERT (length == strlen (expected));
1145 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1146 if (o)
1148 for (i = 0; i < 16; i++)
1149 ASSERT (offsets[i] == i);
1150 ASSERT (offsets[16] == MAGIC);
1151 free (offsets);
1153 free (result);
1155 break;
1156 case iconveh_escape_sequence:
1158 static const char expected[] = "Rafa\\u0142 Maszkowski";
1159 ASSERT (retval == 0);
1160 ASSERT (length == strlen (expected));
1161 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1162 if (o)
1164 for (i = 0; i < 16; i++)
1165 ASSERT (offsets[i] == (i < 5 ? i :
1166 i + 5));
1167 ASSERT (offsets[16] == MAGIC);
1168 free (offsets);
1170 free (result);
1172 break;
1177 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1178 for (h = 0; h < SIZEOF (handlers); h++)
1180 enum iconv_ilseq_handler handler = handlers[h];
1181 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1182 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1183 for (o = 0; o < 2; o++)
1185 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1186 char *result = NULL;
1187 size_t length = 0;
1188 int retval = mem_iconveh (input, strlen (input),
1189 "ISO-8859-1", "UTF-8",
1190 handler,
1191 offsets,
1192 &result, &length);
1193 ASSERT (retval == 0);
1194 ASSERT (length == strlen (expected));
1195 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1196 if (o)
1198 for (i = 0; i < 37; i++)
1199 ASSERT (offsets[i] == (i < 1 ? i :
1200 i < 12 ? i + 1 :
1201 i < 18 ? i + 2 :
1202 i + 3));
1203 ASSERT (offsets[37] == MAGIC);
1204 free (offsets);
1206 free (result);
1210 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
1211 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
1212 for (h = 0; h < SIZEOF (handlers); h++)
1214 enum iconv_ilseq_handler handler = handlers[h];
1215 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1216 static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
1217 for (o = 0; o < 2; o++)
1219 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1220 char *result = NULL;
1221 size_t length = 0;
1222 int retval = mem_iconveh (input, strlen (input),
1223 "ISO-8859-1", "GB18030",
1224 handler,
1225 offsets,
1226 &result, &length);
1227 ASSERT (retval == 0);
1228 ASSERT (length == strlen (expected));
1229 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1230 if (o)
1232 for (i = 0; i < 37; i++)
1233 ASSERT (offsets[i] == (i < 1 ? i :
1234 i < 12 ? i + 3 :
1235 i < 18 ? i + 6 :
1236 i + 7));
1237 ASSERT (offsets[37] == MAGIC);
1238 free (offsets);
1240 free (result);
1243 # endif
1245 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1246 for (h = 0; h < SIZEOF (handlers); h++)
1248 enum iconv_ilseq_handler handler = handlers[h];
1249 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1250 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1251 for (o = 0; o < 2; o++)
1253 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1254 char *result = NULL;
1255 size_t length = 0;
1256 int retval = mem_iconveh (input, strlen (input),
1257 "UTF-8", "ISO-8859-1",
1258 handler,
1259 offsets,
1260 &result, &length);
1261 ASSERT (retval == 0);
1262 ASSERT (length == strlen (expected));
1263 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1264 if (o)
1266 for (i = 0; i < 41; i++)
1267 ASSERT (offsets[i] == (i < 1 ? i :
1268 i == 1 ? (size_t)(-1) :
1269 i < 13 ? i - 1 :
1270 i == 13 ? (size_t)(-1) :
1271 i < 20 ? i - 2 :
1272 i == 20 ? (size_t)(-1) :
1273 i < 40 ? i - 3 :
1274 (size_t)(-1)));
1275 ASSERT (offsets[41] == MAGIC);
1276 free (offsets);
1278 free (result);
1282 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1283 for (h = 0; h < SIZEOF (handlers); h++)
1285 enum iconv_ilseq_handler handler = handlers[h];
1286 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
1287 for (o = 0; o < 2; o++)
1289 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1290 char *result = NULL;
1291 size_t length = 0;
1292 int retval = mem_iconveh (input, strlen (input),
1293 "UTF-8", "ISO-8859-1",
1294 handler,
1295 offsets,
1296 &result, &length);
1297 switch (handler)
1299 case iconveh_error:
1300 ASSERT (retval == -1 && errno == EILSEQ);
1301 ASSERT (result == NULL);
1302 if (o)
1303 free (offsets);
1304 break;
1305 case iconveh_question_mark:
1306 case iconveh_replacement_character:
1308 static const char expected[] = "Rafa? Maszkowski";
1309 ASSERT (retval == 0);
1310 ASSERT (length == strlen (expected));
1311 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1312 if (o)
1314 for (i = 0; i < 17; i++)
1315 ASSERT (offsets[i] == (i < 5 ? i :
1316 i == 5 ? (size_t)(-1) :
1317 i - 1));
1318 ASSERT (offsets[17] == MAGIC);
1319 free (offsets);
1321 free (result);
1323 break;
1324 case iconveh_escape_sequence:
1326 static const char expected[] = "Rafa\\u0142 Maszkowski";
1327 ASSERT (retval == 0);
1328 ASSERT (length == strlen (expected));
1329 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
1330 if (o)
1332 for (i = 0; i < 17; i++)
1333 ASSERT (offsets[i] == (i < 5 ? i :
1334 i == 5 ? (size_t)(-1) :
1335 i + 4));
1336 ASSERT (offsets[17] == MAGIC);
1337 free (offsets);
1339 free (result);
1341 break;
1346 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1347 for (h = 0; h < SIZEOF (handlers); h++)
1349 enum iconv_ilseq_handler handler = handlers[h];
1350 static const char input[] = "\342";
1351 for (o = 0; o < 2; o++)
1353 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
1354 char *result = NULL;
1355 size_t length = 0;
1356 int retval = mem_iconveh (input, strlen (input),
1357 "UTF-8", "ISO-8859-1",
1358 handler,
1359 offsets,
1360 &result, &length);
1361 ASSERT (retval == 0);
1362 ASSERT (length == 0);
1363 if (o)
1365 ASSERT (offsets[0] == 0);
1366 ASSERT (offsets[1] == MAGIC);
1367 free (offsets);
1369 free (result);
1373 /* ------------------------- Test str_iconveh() ------------------------- */
1375 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
1376 for (h = 0; h < SIZEOF (handlers); h++)
1378 enum iconv_ilseq_handler handler = handlers[h];
1379 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1380 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1381 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1382 ASSERT (result != NULL);
1383 ASSERT (strcmp (result, expected) == 0);
1384 free (result);
1387 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
1388 for (h = 0; h < SIZEOF (handlers); h++)
1390 enum iconv_ilseq_handler handler = handlers[h];
1391 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
1392 char *result = str_iconveh (input, "ISO-8859-2", "ISO-8859-1", handler);
1393 switch (handler)
1395 case iconveh_error:
1396 ASSERT (result == NULL && errno == EILSEQ);
1397 break;
1398 case iconveh_question_mark:
1399 case iconveh_replacement_character:
1401 static const char expected[] = "Rafa? Maszkowski";
1402 ASSERT (result != NULL);
1403 ASSERT (strcmp (result, expected) == 0);
1404 free (result);
1406 break;
1407 case iconveh_escape_sequence:
1409 static const char expected[] = "Rafa\\u0142 Maszkowski";
1410 ASSERT (result != NULL);
1411 ASSERT (strcmp (result, expected) == 0);
1412 free (result);
1414 break;
1418 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
1419 for (h = 0; h < SIZEOF (handlers); h++)
1421 enum iconv_ilseq_handler handler = handlers[h];
1422 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1423 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1424 char *result = str_iconveh (input, "ISO-8859-1", "UTF-8", handler);
1425 ASSERT (result != NULL);
1426 ASSERT (strcmp (result, expected) == 0);
1427 free (result);
1430 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) || (defined __GLIBC__ && !defined __UCLIBC__)
1431 /* Test conversion from ISO-8859-1 to GB18030 with no errors. */
1432 for (h = 0; h < SIZEOF (handlers); h++)
1434 enum iconv_ilseq_handler handler = handlers[h];
1435 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1436 static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118";
1437 char *result = str_iconveh (input, "ISO-8859-1", "GB18030", handler);
1438 ASSERT (result != NULL);
1439 ASSERT (strcmp (result, expected) == 0);
1440 free (result);
1442 # endif
1444 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
1445 for (h = 0; h < SIZEOF (handlers); h++)
1447 enum iconv_ilseq_handler handler = handlers[h];
1448 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
1449 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
1450 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1451 ASSERT (result != NULL);
1452 ASSERT (strcmp (result, expected) == 0);
1453 free (result);
1456 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
1457 for (h = 0; h < SIZEOF (handlers); h++)
1459 enum iconv_ilseq_handler handler = handlers[h];
1460 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
1461 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1462 switch (handler)
1464 case iconveh_error:
1465 ASSERT (result == NULL && errno == EILSEQ);
1466 break;
1467 case iconveh_question_mark:
1468 case iconveh_replacement_character:
1470 static const char expected[] = "Costs: 27 ?";
1471 ASSERT (result != NULL);
1472 ASSERT (strcmp (result, expected) == 0);
1473 free (result);
1475 break;
1476 case iconveh_escape_sequence:
1478 static const char expected[] = "Costs: 27 \\u20AC";
1479 ASSERT (result != NULL);
1480 ASSERT (strcmp (result, expected) == 0);
1481 free (result);
1483 break;
1487 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
1488 for (h = 0; h < SIZEOF (handlers); h++)
1490 enum iconv_ilseq_handler handler = handlers[h];
1491 static const char input[] = "\342";
1492 char *result = str_iconveh (input, "UTF-8", "ISO-8859-1", handler);
1493 ASSERT (result != NULL);
1494 ASSERT (strcmp (result, "") == 0);
1495 free (result);
1498 /* -------------------------------- Done. -------------------------------- */
1500 if (cd_ascii_to_88591 != (iconv_t)(-1))
1501 iconv_close (cd_ascii_to_88591);
1502 iconv_close (cd_ascii_to_utf8);
1503 if (cd_utf7_to_utf8 != (iconv_t)(-1))
1504 iconv_close (cd_utf7_to_utf8);
1506 #endif
1508 return test_exit_status;