mirror of https://gitee.com/Nocallback/glibc.git
Browse Source
This commit replaces string manipulation during `iconv_open' and iconv_prog option parsing with a structured, flag based conversion specification. In doing so, it alters the internal `__gconv_open' interface and accordingly adjusts its uses. This change fixes several hangs in the iconv program and therefore includes a new test to exercise iconv_prog options that originally led to these hangs. It also includes a new regression test for option handling in the iconv function. Reviewed-by: Florian Weimer <fweimer@redhat.com> Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org> Reviewed-by: Carlos O'Donell <carlos@redhat.com>nsz/pacbti-v7
11 changed files with 988 additions and 143 deletions
@ -0,0 +1,218 @@ |
|||
/* Charset name normalization.
|
|||
Copyright (C) 2020 Free Software Foundation, Inc. |
|||
This file is part of the GNU C Library. |
|||
|
|||
The GNU C Library is free software; you can redistribute it and/or |
|||
modify it under the terms of the GNU Lesser General Public |
|||
License as published by the Free Software Foundation; either |
|||
version 2.1 of the License, or (at your option) any later version. |
|||
|
|||
The GNU C Library is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
Lesser General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU Lesser General Public |
|||
License along with the GNU C Library; if not, see |
|||
<http://www.gnu.org/licenses/>. */
|
|||
|
|||
|
|||
#include <stdlib.h> |
|||
#include <ctype.h> |
|||
#include <locale.h> |
|||
#include <stdbool.h> |
|||
#include <string.h> |
|||
#include <sys/stat.h> |
|||
#include "gconv_int.h" |
|||
#include "gconv_charset.h" |
|||
|
|||
|
|||
/* This function returns a pointer to the last suffix in a conversion code
|
|||
string. Valid suffixes matched by this function are of the form: '/' or ',' |
|||
followed by arbitrary text that doesn't contain '/' or ','. It does not |
|||
edit the string in any way. The caller is expected to parse the suffix and |
|||
remove it (by e.g. truncating the string) before the next call. */ |
|||
static char * |
|||
find_suffix (char *s) |
|||
{ |
|||
/* The conversion code is in the form of a triplet, separated by '/' chars.
|
|||
The third component of the triplet contains suffixes. If we don't have two |
|||
slashes, we don't have a suffix. */ |
|||
|
|||
int slash_count = 0; |
|||
char *suffix_term = NULL; |
|||
|
|||
for (int i = 0; s[i] != '\0'; i++) |
|||
switch (s[i]) |
|||
{ |
|||
case '/': |
|||
slash_count++; |
|||
/* Fallthrough */ |
|||
case ',': |
|||
suffix_term = &s[i]; |
|||
} |
|||
|
|||
if (slash_count >= 2) |
|||
return suffix_term; |
|||
|
|||
return NULL; |
|||
} |
|||
|
|||
|
|||
struct gconv_parsed_code |
|||
{ |
|||
char *code; |
|||
bool translit; |
|||
bool ignore; |
|||
}; |
|||
|
|||
|
|||
/* This function parses an iconv_open encoding PC.CODE, strips any suffixes
|
|||
(such as TRANSLIT or IGNORE) from it and sets corresponding flags in it. */ |
|||
static void |
|||
gconv_parse_code (struct gconv_parsed_code *pc) |
|||
{ |
|||
pc->translit = false; |
|||
pc->ignore = false; |
|||
|
|||
while (1) |
|||
{ |
|||
/* First drop any trailing whitespaces and separators. */ |
|||
size_t len = strlen (pc->code); |
|||
while ((len > 0) |
|||
&& (isspace (pc->code[len - 1]) |
|||
|| pc->code[len - 1] == ',' |
|||
|| pc->code[len - 1] == '/')) |
|||
len--; |
|||
|
|||
pc->code[len] = '\0'; |
|||
|
|||
if (len == 0) |
|||
return; |
|||
|
|||
char * suffix = find_suffix (pc->code); |
|||
if (suffix == NULL) |
|||
{ |
|||
/* At this point, we have processed and removed all suffixes from the
|
|||
code and what remains of the code is suffix free. */ |
|||
return; |
|||
} |
|||
else |
|||
{ |
|||
/* A suffix is processed from the end of the code array going
|
|||
backwards, one suffix at a time. The suffix is an index into the |
|||
code character array and points to: one past the end of the code |
|||
and any unprocessed suffixes, and to the beginning of the suffix |
|||
currently being processed during this iteration. We must process |
|||
this suffix and then drop it from the code by terminating the |
|||
preceding text with NULL. |
|||
|
|||
We want to allow and recognize suffixes such as: |
|||
|
|||
"/TRANSLIT" i.e. single suffix |
|||
"//TRANSLIT" i.e. single suffix and multiple separators |
|||
"//TRANSLIT/IGNORE" i.e. suffixes separated by "/" |
|||
"/TRANSLIT//IGNORE" i.e. suffixes separated by "//" |
|||
"//IGNORE,TRANSLIT" i.e. suffixes separated by "," |
|||
"//IGNORE," i.e. trailing "," |
|||
"//TRANSLIT/" i.e. trailing "/" |
|||
"//TRANSLIT//" i.e. trailing "//" |
|||
"/" i.e. empty suffix. |
|||
|
|||
Unknown suffixes are silently discarded and ignored. */ |
|||
|
|||
if ((__strcasecmp_l (suffix, |
|||
GCONV_TRIPLE_SEPARATOR |
|||
GCONV_TRANSLIT_SUFFIX, |
|||
_nl_C_locobj_ptr) == 0) |
|||
|| (__strcasecmp_l (suffix, |
|||
GCONV_SUFFIX_SEPARATOR |
|||
GCONV_TRANSLIT_SUFFIX, |
|||
_nl_C_locobj_ptr) == 0)) |
|||
pc->translit = true; |
|||
|
|||
if ((__strcasecmp_l (suffix, |
|||
GCONV_TRIPLE_SEPARATOR |
|||
GCONV_IGNORE_ERRORS_SUFFIX, |
|||
_nl_C_locobj_ptr) == 0) |
|||
|| (__strcasecmp_l (suffix, |
|||
GCONV_SUFFIX_SEPARATOR |
|||
GCONV_IGNORE_ERRORS_SUFFIX, |
|||
_nl_C_locobj_ptr) == 0)) |
|||
pc->ignore = true; |
|||
|
|||
/* We just processed this suffix. We can now drop it from the
|
|||
code string by truncating it at the suffix's position. */ |
|||
suffix[0] = '\0'; |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
/* This function accepts the charset names of the source and destination of the
|
|||
conversion and populates *conv_spec with an equivalent conversion |
|||
specification that may later be used by __gconv_open. The charset names |
|||
might contain options in the form of suffixes that alter the conversion, |
|||
e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring |
|||
and truncating any suffix options in fromcode, and processing and truncating |
|||
any suffix options in tocode. Supported suffix options ("TRANSLIT" or |
|||
"IGNORE") when found in tocode lead to the corresponding flag in *conv_spec |
|||
to be set to true. Unrecognized suffix options are silently discarded. If |
|||
the function succeeds, it returns conv_spec back to the caller. It returns |
|||
NULL upon failure. conv_spec must be allocated and freed by the caller. */ |
|||
struct gconv_spec * |
|||
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, |
|||
const char *tocode) |
|||
{ |
|||
struct gconv_parsed_code pfc, ptc; |
|||
struct gconv_spec *ret = NULL; |
|||
|
|||
pfc.code = __strdup (fromcode); |
|||
ptc.code = __strdup (tocode); |
|||
|
|||
if ((pfc.code == NULL) |
|||
|| (ptc.code == NULL)) |
|||
goto out; |
|||
|
|||
gconv_parse_code (&pfc); |
|||
gconv_parse_code (&ptc); |
|||
|
|||
/* We ignore suffixes in the fromcode because that is how the current
|
|||
implementation has always handled them. Only suffixes in the tocode are |
|||
processed and handled. The reality is that invalid input in the input |
|||
character set should only be ignored if the fromcode specifies IGNORE. |
|||
The current implementation ignores invalid intput in the input character |
|||
set if the tocode contains IGNORE. We preserve this behavior for |
|||
backwards compatibility. In the future we may split the handling of |
|||
IGNORE to allow a finer grained specification of ignorning invalid input |
|||
and/or ignoring invalid output. */ |
|||
conv_spec->translit = ptc.translit; |
|||
conv_spec->ignore = ptc.ignore; |
|||
|
|||
/* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might
|
|||
be able to add one or two trailing '/' characters if necessary. */ |
|||
conv_spec->fromcode = malloc (strlen (fromcode) + 3); |
|||
if (conv_spec->fromcode == NULL) |
|||
goto out; |
|||
|
|||
conv_spec->tocode = malloc (strlen (tocode) + 3); |
|||
if (conv_spec->tocode == NULL) |
|||
{ |
|||
free (conv_spec->fromcode); |
|||
conv_spec->fromcode = NULL; |
|||
goto out; |
|||
} |
|||
|
|||
/* Strip unrecognized characters and ensure that the code has two '/'
|
|||
characters as per conversion code triplet specification. */ |
|||
strip (conv_spec->fromcode, pfc.code); |
|||
strip (conv_spec->tocode, ptc.code); |
|||
ret = conv_spec; |
|||
|
|||
out: |
|||
free (pfc.code); |
|||
free (ptc.code); |
|||
|
|||
return ret; |
|||
} |
|||
libc_hidden_def (__gconv_create_spec) |
|||
@ -0,0 +1,347 @@ |
|||
/* Test iconv's TRANSLIT and IGNORE option handling
|
|||
|
|||
Copyright (C) 2020 Free Software Foundation, Inc. |
|||
This file is part of the GNU C Library. |
|||
|
|||
The GNU C Library is free software; you can redistribute it and/or |
|||
modify it under the terms of the GNU Lesser General Public |
|||
License as published by the Free Software Foundation; either |
|||
version 2.1 of the License, or (at your option) any later version. |
|||
|
|||
The GNU C Library is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
Lesser General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU Lesser General Public |
|||
License along with the GNU C Library; if not, see |
|||
<https://www.gnu.org/licenses/>. */
|
|||
|
|||
|
|||
#include <iconv.h> |
|||
#include <locale.h> |
|||
#include <errno.h> |
|||
#include <string.h> |
|||
#include <support/support.h> |
|||
#include <support/check.h> |
|||
|
|||
|
|||
/* Run one iconv test. Arguments:
|
|||
to: destination character set and options |
|||
from: source character set |
|||
input: input string to be converted |
|||
exp_in: expected number of bytes consumed |
|||
exp_ret: expected return value (error or number of irreversible conversions) |
|||
exp_out: expected output string |
|||
exp_err: expected value of `errno' after iconv returns. */ |
|||
static void |
|||
test_iconv (const char *to, const char *from, char *input, size_t exp_in, |
|||
size_t exp_ret, const char *exp_out, int exp_err) |
|||
{ |
|||
iconv_t cd; |
|||
char outbuf[500]; |
|||
size_t inlen, outlen; |
|||
char *inptr, *outptr; |
|||
size_t n; |
|||
|
|||
cd = iconv_open (to, from); |
|||
TEST_VERIFY (cd != (iconv_t) -1); |
|||
|
|||
inlen = strlen (input); |
|||
outlen = sizeof (outbuf); |
|||
inptr = input; |
|||
outptr = outbuf; |
|||
|
|||
errno = 0; |
|||
n = iconv (cd, &inptr, &inlen, &outptr, &outlen); |
|||
|
|||
TEST_COMPARE (n, exp_ret); |
|||
TEST_VERIFY (inptr == input + exp_in); |
|||
TEST_COMPARE (errno, exp_err); |
|||
TEST_COMPARE_BLOB (outbuf, outptr - outbuf, exp_out, strlen (exp_out)); |
|||
TEST_VERIFY (iconv_close (cd) == 0); |
|||
} |
|||
|
|||
|
|||
/* We test option parsing by converting UTF-8 inputs to ASCII under various
|
|||
option combinations. The UTF-8 inputs fall into three categories: |
|||
- ASCII-only, |
|||
- non-ASCII, |
|||
- non-ASCII with invalid UTF-8 characters. */ |
|||
|
|||
/* 1. */ |
|||
char ascii[] = "Just some ASCII text"; |
|||
|
|||
/* 2. Valid UTF-8 input and some corresponding expected outputs with various
|
|||
options. The two non-ASCII characters below are accented alphabets: |
|||
an `a' then an `o'. */ |
|||
char utf8[] = "UTF-8 text with \u00E1 couple \u00F3f non-ASCII characters"; |
|||
char u2a[] = "UTF-8 text with "; |
|||
char u2a_translit[] = "UTF-8 text with a couple of non-ASCII characters"; |
|||
char u2a_ignore[] = "UTF-8 text with couple f non-ASCII characters"; |
|||
|
|||
/* 3. Invalid UTF-8 input and some corresponding expected outputs. \xff is
|
|||
invalid UTF-8. It's followed by some valid but non-ASCII UTF-8. */ |
|||
char iutf8[] = "Invalid UTF-8 \xff\u27E6text\u27E7"; |
|||
char iu2a[] = "Invalid UTF-8 "; |
|||
char iu2a_ignore[] = "Invalid UTF-8 text"; |
|||
char iu2a_both[] = "Invalid UTF-8 [|text|]"; |
|||
|
|||
/* 4. Another invalid UTF-8 input and corresponding expected outputs. This time
|
|||
the valid non-ASCII UTF-8 characters appear before the invalid \xff. */ |
|||
char jutf8[] = "Invalid \u27E6UTF-8\u27E7 \xfftext"; |
|||
char ju2a[] = "Invalid "; |
|||
char ju2a_translit[] = "Invalid [|UTF-8|] "; |
|||
char ju2a_ignore[] = "Invalid UTF-8 text"; |
|||
char ju2a_both[] = "Invalid [|UTF-8|] text"; |
|||
|
|||
/* We also test option handling for character set names that have the form
|
|||
"A/B". In this test, we test conversions "ISO-10646/UTF-8", and either |
|||
ISO-8859-1 or ASCII. */ |
|||
|
|||
/* 5. Accented 'A' and 'a' characters in ISO-8859-1 and UTF-8, and an
|
|||
equivalent ASCII transliteration. */ |
|||
char iso8859_1_a[] = {0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* Accented A's. */ |
|||
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* Accented a's. */ |
|||
0x00}; |
|||
char utf8_a[] = "\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5" |
|||
"\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5"; |
|||
char ascii_a[] = "AAAAAAaaaaaa"; |
|||
|
|||
/* 6. An invalid ASCII string where [0] is invalid and [1] is '~'. */ |
|||
char iascii [] = {0x80, '~', '\0'}; |
|||
char empty[] = ""; |
|||
char ia2u_ignore[] = "~"; |
|||
|
|||
static int |
|||
do_test (void) |
|||
{ |
|||
xsetlocale (LC_ALL, "en_US.UTF-8"); |
|||
|
|||
|
|||
/* 0. iconv_open should gracefully fail for invalid character sets. */ |
|||
|
|||
TEST_VERIFY (iconv_open ("INVALID", "UTF-8") == (iconv_t) -1); |
|||
TEST_VERIFY (iconv_open ("UTF-8", "INVALID") == (iconv_t) -1); |
|||
TEST_VERIFY (iconv_open ("INVALID", "INVALID") == (iconv_t) -1); |
|||
|
|||
|
|||
/* 1. ASCII-only UTF-8 input should convert to ASCII with no changes: */ |
|||
|
|||
test_iconv ("ASCII", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); |
|||
test_iconv ("ASCII//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); |
|||
test_iconv ("ASCII//TRANSLIT", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); |
|||
test_iconv ("ASCII//TRANSLIT//", "UTF-8", ascii, strlen (ascii), 0, ascii, |
|||
0); |
|||
test_iconv ("ASCII//IGNORE", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); |
|||
test_iconv ("ASCII//IGNORE//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); |
|||
|
|||
|
|||
/* 2. Valid UTF-8 input with non-ASCII characters: */ |
|||
|
|||
/* EILSEQ when converted to ASCII. */ |
|||
test_iconv ("ASCII", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, EILSEQ); |
|||
|
|||
/* Converted without error with TRANSLIT enabled. */ |
|||
test_iconv ("ASCII//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, u2a_translit, |
|||
0); |
|||
|
|||
/* EILSEQ with IGNORE enabled. Non-ASCII chars dropped from output. */ |
|||
test_iconv ("ASCII//IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, |
|||
u2a_ignore, EILSEQ); |
|||
|
|||
/* With TRANSLIT and IGNORE enabled, transliterated without error. We test
|
|||
four combinations. */ |
|||
|
|||
test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
/* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ |
|||
test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
|
|||
/* Misspellings of TRANSLIT and IGNORE are ignored, but conversion still
|
|||
works while respecting any other correctly spelled options. */ |
|||
|
|||
test_iconv ("ASCII//T", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, |
|||
EILSEQ); |
|||
test_iconv ("ASCII//TRANSLITERATE", "UTF-8", utf8, strlen (u2a), (size_t) -1, |
|||
u2a, EILSEQ); |
|||
test_iconv ("ASCII//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, |
|||
EILSEQ); |
|||
test_iconv ("ASCII//IGNORED", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, |
|||
EILSEQ); |
|||
test_iconv ("ASCII//TRANSLITERATE//IGNORED", "UTF-8", utf8, strlen (u2a), |
|||
(size_t) -1, u2a, EILSEQ); |
|||
test_iconv ("ASCII//IGNORED,TRANSLITERATE", "UTF-8", utf8, strlen (u2a), |
|||
(size_t) -1, u2a, EILSEQ); |
|||
test_iconv ("ASCII//T//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, |
|||
EILSEQ); |
|||
|
|||
test_iconv ("ASCII//TRANSLIT//I", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
/* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ |
|||
test_iconv ("ASCII//I//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
test_iconv ("ASCII//IGNORED,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
test_iconv ("ASCII//TRANSLIT,IGNORED", "UTF-8", utf8, strlen (utf8), 2, |
|||
u2a_translit, 0); |
|||
|
|||
test_iconv ("ASCII//IGNORE,T", "UTF-8", utf8, strlen (utf8), (size_t) -1, |
|||
u2a_ignore, EILSEQ); |
|||
test_iconv ("ASCII//T,IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, |
|||
u2a_ignore, EILSEQ); |
|||
/* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ |
|||
test_iconv ("ASCII//TRANSLITERATE//IGNORE", "UTF-8", utf8, strlen (utf8), |
|||
(size_t) -1, u2a_ignore, EILSEQ); |
|||
test_iconv ("ASCII//IGNORE//TRANSLITERATE", "UTF-8", utf8, strlen (utf8), |
|||
(size_t) -1, u2a_ignore, EILSEQ); |
|||
|
|||
|
|||
/* 3. Invalid UTF-8 followed by some valid non-ASCII UTF-8 characters: */ |
|||
|
|||
/* EILSEQ; output is truncated at the first invalid UTF-8 character. */ |
|||
test_iconv ("ASCII", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, iu2a, |
|||
EILSEQ); |
|||
|
|||
/* With TRANSLIT enabled: EILSEQ; output still truncated at the first invalid
|
|||
UTF-8 character. */ |
|||
test_iconv ("ASCII//TRANSLIT", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, |
|||
iu2a, EILSEQ); |
|||
|
|||
/* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and
|
|||
valid UTF-8 non-ASCII characters. */ |
|||
test_iconv ("ASCII//IGNORE", "UTF-8", iutf8, strlen (iutf8), (size_t) -1, |
|||
iu2a_ignore, EILSEQ); |
|||
|
|||
/* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8
|
|||
characters and transliterates valid non-ASCII UTF-8 characters. We test |
|||
four combinations. */ |
|||
|
|||
test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, |
|||
iu2a_both, 0); |
|||
/* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ |
|||
test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, |
|||
iu2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, |
|||
iu2a_both, 0); |
|||
/* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ |
|||
test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, |
|||
iu2a_both, 0); |
|||
|
|||
|
|||
/* 4. Invalid UTF-8 with valid non-ASCII UTF-8 chars appearing first: */ |
|||
|
|||
/* EILSEQ; output is truncated at the first non-ASCII character. */ |
|||
test_iconv ("ASCII", "UTF-8", jutf8, strlen (ju2a), (size_t) -1, ju2a, |
|||
EILSEQ); |
|||
|
|||
/* With TRANSLIT enabled: EILSEQ; output now truncated at the first invalid
|
|||
UTF-8 character. */ |
|||
test_iconv ("ASCII//TRANSLIT", "UTF-8", jutf8, strlen (jutf8) - 5, |
|||
(size_t) -1, ju2a_translit, EILSEQ); |
|||
test_iconv ("ASCII//translit", "UTF-8", jutf8, strlen (jutf8) - 5, |
|||
(size_t) -1, ju2a_translit, EILSEQ); |
|||
|
|||
/* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and
|
|||
valid UTF-8 non-ASCII characters. */ |
|||
test_iconv ("ASCII//IGNORE", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, |
|||
ju2a_ignore, EILSEQ); |
|||
test_iconv ("ASCII//ignore", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, |
|||
ju2a_ignore, EILSEQ); |
|||
|
|||
/* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8
|
|||
characters and transliterates valid non-ASCII UTF-8 characters. We test |
|||
several combinations. */ |
|||
|
|||
test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
/* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ |
|||
test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
/* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ |
|||
test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//translit,ignore", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
/* Trailing whitespace and separators should be ignored. */ |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT ", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT/", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT//", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT,", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT,,", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
test_iconv ("ASCII//IGNORE,TRANSLIT /,", "UTF-8", jutf8, strlen (jutf8), 2, |
|||
ju2a_both, 0); |
|||
|
|||
/* TRANSLIT or IGNORE suffixes in fromcode should be ignored. */ |
|||
test_iconv ("ASCII", "UTF-8//TRANSLIT", jutf8, strlen (ju2a), (size_t) -1, |
|||
ju2a, EILSEQ); |
|||
test_iconv ("ASCII", "UTF-8//IGNORE", jutf8, strlen (ju2a), (size_t) -1, |
|||
ju2a, EILSEQ); |
|||
test_iconv ("ASCII", "UTF-8//TRANSLIT,IGNORE", jutf8, strlen (ju2a), |
|||
(size_t) -1, ju2a, EILSEQ); |
|||
|
|||
|
|||
/* 5. Charset names of the form "A/B/": */ |
|||
|
|||
/* ISO-8859-1 is converted to UTF-8 without needing transliteration. */ |
|||
test_iconv ("ISO-10646/UTF-8", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8/", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8/IGNORE", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8//IGNORE", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8//TRANSLIT", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8//TRANSLIT/IGNORE", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ISO-8859-1", iso8859_1_a, |
|||
strlen (iso8859_1_a), 0, utf8_a, 0); |
|||
|
|||
/* UTF-8 with accented A's is converted to ASCII with transliteration. */ |
|||
test_iconv ("ASCII", "ISO-10646/UTF-8", utf8_a, |
|||
0, (size_t) -1, empty, EILSEQ); |
|||
test_iconv ("ASCII//IGNORE", "ISO-10646/UTF-8", utf8_a, |
|||
strlen (utf8_a), (size_t) -1, empty, EILSEQ); |
|||
test_iconv ("ASCII//TRANSLIT", "ISO-10646/UTF-8", utf8_a, |
|||
strlen (utf8_a), 12, ascii_a, 0); |
|||
|
|||
/* Invalid ASCII is converted to UTF-8 only with IGNORE. */ |
|||
test_iconv ("ISO-10646/UTF-8", "ASCII", iascii, strlen (empty), (size_t) -1, |
|||
empty, EILSEQ); |
|||
test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ASCII", iascii, strlen (empty), |
|||
(size_t) -1, empty, EILSEQ); |
|||
test_iconv ("ISO-10646/UTF-8/IGNORE", "ASCII", iascii, strlen (iascii), |
|||
(size_t) -1, ia2u_ignore, EILSEQ); |
|||
test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ASCII", iascii, |
|||
strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); |
|||
/* Due to bug 19519, iconv was ignoring IGNORE for the following three
|
|||
inputs: */ |
|||
test_iconv ("ISO-10646/UTF-8/TRANSLIT/IGNORE", "ASCII", iascii, |
|||
strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); |
|||
test_iconv ("ISO-10646/UTF-8//TRANSLIT,IGNORE", "ASCII", iascii, |
|||
strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); |
|||
test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ASCII", iascii, |
|||
strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
#include <support/test-driver.c> |
|||
@ -0,0 +1,280 @@ |
|||
#!/bin/bash |
|||
# Test for some known iconv(1) hangs from bug 19519, and miscellaneous |
|||
# iconv(1) program error conditions. |
|||
# Copyright (C) 2020 Free Software Foundation, Inc. |
|||
# This file is part of the GNU C Library. |
|||
|
|||
# The GNU C Library is free software; you can redistribute it and/or |
|||
# modify it under the terms of the GNU Lesser General Public |
|||
# License as published by the Free Software Foundation; either |
|||
# version 2.1 of the License, or (at your option) any later version. |
|||
|
|||
# The GNU C Library is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
# Lesser General Public License for more details. |
|||
|
|||
# You should have received a copy of the GNU Lesser General Public |
|||
# License along with the GNU C Library; if not, see |
|||
# <https://www.gnu.org/licenses/>. |
|||
|
|||
codir=$1 |
|||
test_wrapper_env="$2" |
|||
run_program_env="$3" |
|||
|
|||
# We have to have some directories in the library path. |
|||
LIBPATH=$codir:$codir/iconvdata |
|||
|
|||
# How the start the iconv(1) program. $from is not defined/expanded yet. |
|||
ICONV=' |
|||
$codir/elf/ld.so --library-path $LIBPATH --inhibit-rpath ${from}.so |
|||
$codir/iconv/iconv_prog |
|||
' |
|||
ICONV="$test_wrapper_env $run_program_env $ICONV" |
|||
|
|||
# List of known hangs; |
|||
# Gathered by running an exhaustive 2 byte input search against glibc-2.28 |
|||
hangarray=( |
|||
"\x00\x23;-c;ANSI_X3.110;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa1;-c;ARMSCII-8;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa1;-c;ASMO_449;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;BIG5;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xff;-c;BIG5HKSCS;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xff;-c;BRF;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xff;-c;BS_4730;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1250;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x98;-c;CP1251;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1252;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1253;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1254;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1255;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1257;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;CP1258;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;CP932;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;CSA_Z243.4-1985-1;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;CSA_Z243.4-1985-2;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;DEC-MCS;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;DIN_66003;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;DS_2089;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-AT-DE;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-AT-DE-A;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-CA-FR;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-DK-NO;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-DK-NO-A;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-ES;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-ES-A;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-ES-S;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-FI-SE;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-FI-SE-A;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-FR;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-IS-FRISS;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-IT;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-PT;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-UK;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;EBCDIC-US;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ES;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ES2;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;EUC-CN;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;EUC-JISX0213;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;EUC-JP;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;EUC-JP-MS;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;EUC-KR;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;EUC-TW;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GB18030;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GB_1988-80;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GBK;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GOST_19768-74;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GREEK7;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GREEK7-OLD;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;GREEK-CCITT;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;HP-GREEK8;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;HP-ROMAN8;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;HP-ROMAN9;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;HP-THAI8;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;HP-TURKISH8;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM038;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IBM1004;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xff;-c;IBM1008;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;IBM1046;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x51;-c;IBM1132;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa0;-c;IBM1133;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xce;-c;IBM1137;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" |
|||
# These are known hangs that are yet to be fixed: |
|||
# "\x00\x0f;-c;IBM1364;UTF-8" |
|||
# "\x00\x0f;-c;IBM1371;UTF-8" |
|||
# "\x00\x0f;-c;IBM1388;UTF-8" |
|||
# "\x00\x0f;-c;IBM1390;UTF-8" |
|||
# "\x00\x0f;-c;IBM1399;UTF-8" |
|||
"\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM281;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x57;-c;IBM290;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x45;-c;IBM420;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x68;-c;IBM423;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x70;-c;IBM424;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x53;-c;IBM4517;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x53;-c;IBM4899;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa5;-c;IBM4909;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xdc;-c;IBM4971;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM803;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x91;-c;IBM851;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x9b;-c;IBM856;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xd5;-c;IBM857;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;IBM864;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x94;-c;IBM868;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x94;-c;IBM869;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;IBM874;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x6a;-c;IBM875;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM880;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IBM891;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;IBM903;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;IBM904;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM905;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IBM9066;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x48;-c;IBM918;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x57;-c;IBM930;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IBM932;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM933;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM935;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM937;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x41;-c;IBM939;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IBM943;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;INIS;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;INIS-8;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;INIS-CYRILLIC;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xec;-c;ISIRI-3342;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xec;-c;ISO_10367-BOX;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-2022-CN;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-2022-CN-EXT;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-2022-JP;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-2022-JP-2;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-2022-JP-3;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-2022-KR;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO_2033;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO_5427;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO_5427-EXT;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO_5428;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa4;-c;ISO_6937;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa0;-c;ISO_6937-2;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-8859-11;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xa5;-c;ISO-8859-3;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-8859-6;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-8859-7;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;ISO-8859-8;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;ISO-IR-197;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;ISO-IR-209;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;IT;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;JIS_C6220-1969-RO;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;JIS_C6229-1984-B;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;JOHAB;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;JUS_I.B1.002;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;KOI-8;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x88;-c;KOI8-T;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;KSC5636;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;LATIN-GREEK;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;LATIN-GREEK-1;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xf6;-c;MAC-IS;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;MSZ_7795.3;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NATS-DANO;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NATS-SEFI;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NC_NC00-10;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NF_Z_62-010;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NF_Z_62-010_1973;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NS_4551-1;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;NS_4551-2;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;PT;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;PT2;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x98;-c;RK1048;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x98;-c;SEN_850200_B;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x98;-c;SEN_850200_C;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;Shift_JISX0213;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x80;-c;SJIS;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x23;-c;T.61-8BIT;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;TIS-620;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;TSCII;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;UHC;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xd8;-c;UNICODE;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xdc;-c;UTF-16;UTF-8//TRANSLIT//IGNORE" |
|||
"\xdc\x00;-c;UTF-16BE;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\xdc;-c;UTF-16LE;UTF-8//TRANSLIT//IGNORE" |
|||
"\xff\xff;-c;UTF-7;UTF-8//TRANSLIT//IGNORE" |
|||
"\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" |
|||
) |
|||
|
|||
# List of option combinations that *should* lead to an error |
|||
errorarray=( |
|||
# Converting from/to invalid character sets should cause error |
|||
"\x00\x00;;INVALID;INVALID" |
|||
"\x00\x00;;INVALID;UTF-8" |
|||
"\x00\x00;;UTF-8;INVALID" |
|||
) |
|||
|
|||
# Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret |
|||
execute_test () |
|||
{ |
|||
eval PROG=\"$ICONV\" |
|||
echo -en "$twobyte" \ |
|||
| timeout -k 4 3 $PROG $c -f $from -t "$to" &>/dev/null |
|||
ret=$? |
|||
} |
|||
|
|||
check_hangtest_result () |
|||
{ |
|||
if [ "$ret" -eq "124" ] || [ "$ret" -eq "137" ]; then # timeout/hang |
|||
result="HANG" |
|||
else |
|||
if [ "$ret" -eq "139" ]; then # segfault |
|||
result="SEGFAULT" |
|||
else |
|||
if [ "$ret" -gt "127" ]; then # unexpected error |
|||
result="UNEXPECTED" |
|||
else |
|||
result="OK" |
|||
fi |
|||
fi |
|||
fi |
|||
|
|||
echo -n "$result: from: \"$from\", to: \"$to\"," |
|||
echo " input \"$twobyte\", flags \"$c\"" |
|||
|
|||
if [ "$result" != "OK" ]; then |
|||
exit 1 |
|||
fi |
|||
} |
|||
|
|||
for hangcommand in "${hangarray[@]}"; do |
|||
twobyte="$(echo "$hangcommand" | cut -d";" -f 1)" |
|||
c="$(echo "$hangcommand" | cut -d";" -f 2)" |
|||
from="$(echo "$hangcommand" | cut -d";" -f 3)" |
|||
to="$(echo "$hangcommand" | cut -d";" -f 4)" |
|||
execute_test |
|||
check_hangtest_result |
|||
done |
|||
|
|||
check_errtest_result () |
|||
{ |
|||
if [ "$ret" -eq "1" ]; then # we errored out as expected |
|||
result="PASS" |
|||
else |
|||
result="FAIL" |
|||
fi |
|||
echo -n "$result: from: \"$from\", to: \"$to\"," |
|||
echo " input \"$twobyte\", flags \"$c\", return code $ret" |
|||
|
|||
if [ "$result" != "PASS" ]; then |
|||
exit 1 |
|||
fi |
|||
} |
|||
|
|||
for errorcommand in "${errorarray[@]}"; do |
|||
twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" |
|||
c="$(echo "$errorcommand" | cut -d";" -f 2)" |
|||
from="$(echo "$errorcommand" | cut -d";" -f 3)" |
|||
to="$(echo "$errorcommand" | cut -d";" -f 4)" |
|||
execute_test |
|||
check_errtest_result |
|||
done |
|||
Loading…
Reference in new issue