| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744 | <?php/* * This file is part of the Symfony package. * * (c) Fabien Potencier <fabien@symfony.com> * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */namespace Symfony\Polyfill\Iconv;/** * iconv implementation in pure PHP, UTF-8 centric. * * Implemented: * - iconv              - Convert string to requested character encoding * - iconv_mime_decode  - Decodes a MIME header field * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension * - iconv_set_encoding - Set current setting for character encoding conversion * - iconv_mime_encode  - Composes a MIME header field * - iconv_strlen       - Returns the character count of string * - iconv_strpos       - Finds position of first occurrence of a needle within a haystack * - iconv_strrpos      - Finds the last occurrence of a needle within a haystack * - iconv_substr       - Cut out part of a string * * Charsets available for conversion are defined by files * in the charset/ directory and by Iconv::$alias below. * You're welcome to send back any addition you make. * * @author Nicolas Grekas <p@tchwork.com> * * @internal */final class Iconv{    public const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';    public const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';    public static $inputEncoding = 'utf-8';    public static $outputEncoding = 'utf-8';    public static $internalEncoding = 'utf-8';    private static $alias = [        'utf8' => 'utf-8',        'ascii' => 'us-ascii',        'tis-620' => 'iso-8859-11',        'cp1250' => 'windows-1250',        'cp1251' => 'windows-1251',        'cp1252' => 'windows-1252',        'cp1253' => 'windows-1253',        'cp1254' => 'windows-1254',        'cp1255' => 'windows-1255',        'cp1256' => 'windows-1256',        'cp1257' => 'windows-1257',        'cp1258' => 'windows-1258',        'shift-jis' => 'cp932',        'shift_jis' => 'cp932',        'latin1' => 'iso-8859-1',        'latin2' => 'iso-8859-2',        'latin3' => 'iso-8859-3',        'latin4' => 'iso-8859-4',        'latin5' => 'iso-8859-9',        'latin6' => 'iso-8859-10',        'latin7' => 'iso-8859-13',        'latin8' => 'iso-8859-14',        'latin9' => 'iso-8859-15',        'latin10' => 'iso-8859-16',        'iso8859-1' => 'iso-8859-1',        'iso8859-2' => 'iso-8859-2',        'iso8859-3' => 'iso-8859-3',        'iso8859-4' => 'iso-8859-4',        'iso8859-5' => 'iso-8859-5',        'iso8859-6' => 'iso-8859-6',        'iso8859-7' => 'iso-8859-7',        'iso8859-8' => 'iso-8859-8',        'iso8859-9' => 'iso-8859-9',        'iso8859-10' => 'iso-8859-10',        'iso8859-11' => 'iso-8859-11',        'iso8859-12' => 'iso-8859-12',        'iso8859-13' => 'iso-8859-13',        'iso8859-14' => 'iso-8859-14',        'iso8859-15' => 'iso-8859-15',        'iso8859-16' => 'iso-8859-16',        'iso_8859-1' => 'iso-8859-1',        'iso_8859-2' => 'iso-8859-2',        'iso_8859-3' => 'iso-8859-3',        'iso_8859-4' => 'iso-8859-4',        'iso_8859-5' => 'iso-8859-5',        'iso_8859-6' => 'iso-8859-6',        'iso_8859-7' => 'iso-8859-7',        'iso_8859-8' => 'iso-8859-8',        'iso_8859-9' => 'iso-8859-9',        'iso_8859-10' => 'iso-8859-10',        'iso_8859-11' => 'iso-8859-11',        'iso_8859-12' => 'iso-8859-12',        'iso_8859-13' => 'iso-8859-13',        'iso_8859-14' => 'iso-8859-14',        'iso_8859-15' => 'iso-8859-15',        'iso_8859-16' => 'iso-8859-16',        'iso88591' => 'iso-8859-1',        'iso88592' => 'iso-8859-2',        'iso88593' => 'iso-8859-3',        'iso88594' => 'iso-8859-4',        'iso88595' => 'iso-8859-5',        'iso88596' => 'iso-8859-6',        'iso88597' => 'iso-8859-7',        'iso88598' => 'iso-8859-8',        'iso88599' => 'iso-8859-9',        'iso885910' => 'iso-8859-10',        'iso885911' => 'iso-8859-11',        'iso885912' => 'iso-8859-12',        'iso885913' => 'iso-8859-13',        'iso885914' => 'iso-8859-14',        'iso885915' => 'iso-8859-15',        'iso885916' => 'iso-8859-16',    ];    private static $translitMap = [];    private static $convertMap = [];    private static $errorHandler;    private static $lastError;    private static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4];    private static $isValidUtf8;    public static function iconv($inCharset, $outCharset, $str)    {        $str = (string) $str;        if ('' === $str) {            return '';        }        // Prepare for //IGNORE and //TRANSLIT        $translit = $ignore = '';        $outCharset = strtolower($outCharset);        $inCharset = strtolower($inCharset);        if ('' === $outCharset) {            $outCharset = 'iso-8859-1';        }        if ('' === $inCharset) {            $inCharset = 'iso-8859-1';        }        do {            $loop = false;            if ('//translit' === substr($outCharset, -10)) {                $loop = $translit = true;                $outCharset = substr($outCharset, 0, -10);            }            if ('//ignore' === substr($outCharset, -8)) {                $loop = $ignore = true;                $outCharset = substr($outCharset, 0, -8);            }        } while ($loop);        do {            $loop = false;            if ('//translit' === substr($inCharset, -10)) {                $loop = true;                $inCharset = substr($inCharset, 0, -10);            }            if ('//ignore' === substr($inCharset, -8)) {                $loop = true;                $inCharset = substr($inCharset, 0, -8);            }        } while ($loop);        if (isset(self::$alias[$inCharset])) {            $inCharset = self::$alias[$inCharset];        }        if (isset(self::$alias[$outCharset])) {            $outCharset = self::$alias[$outCharset];        }        // Load charset maps        if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))          || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {            trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));            return false;        }        if ('utf-8' !== $inCharset) {            // Convert input to UTF-8            $result = '';            if (self::mapToUtf8($result, $inMap, $str, $ignore)) {                $str = $result;            } else {                $str = false;            }            self::$isValidUtf8 = true;        } else {            self::$isValidUtf8 = preg_match('//u', $str);            if (!self::$isValidUtf8 && !$ignore) {                trigger_error(self::ERROR_ILLEGAL_CHARACTER);                return false;            }            if ('utf-8' === $outCharset) {                // UTF-8 validation                $str = self::utf8ToUtf8($str, $ignore);            }        }        if ('utf-8' !== $outCharset && false !== $str) {            // Convert output to UTF-8            $result = '';            if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {                return $result;            }            return false;        }        return $str;    }    public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)    {        if (null === $charset) {            $charset = self::$internalEncoding;        }        if (false !== strpos($str, "\r")) {            $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");        }        $str = explode("\n\n", $str, 2);        $headers = [];        $str = preg_split('/\n(?![ \t])/', $str[0]);        foreach ($str as $str) {            $str = self::iconv_mime_decode($str, $mode, $charset);            if (false === $str) {                return false;            }            $str = explode(':', $str, 2);            if (2 === \count($str)) {                if (isset($headers[$str[0]])) {                    if (!\is_array($headers[$str[0]])) {                        $headers[$str[0]] = [$headers[$str[0]]];                    }                    $headers[$str[0]][] = ltrim($str[1]);                } else {                    $headers[$str[0]] = ltrim($str[1]);                }            }        }        return $headers;    }    public static function iconv_mime_decode($str, $mode = 0, $charset = null)    {        if (null === $charset) {            $charset = self::$internalEncoding;        }        if (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {            $charset .= '//IGNORE';        }        if (false !== strpos($str, "\r")) {            $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");        }        $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);        $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));        $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, \PREG_SPLIT_DELIM_CAPTURE);        $result = self::iconv('utf-8', $charset, $str[0]);        if (false === $result) {            return false;        }        $i = 1;        $len = \count($str);        while ($i < $len) {            $c = strtolower($str[$i]);            if ((\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)              && 'utf-8' !== $c              && !isset(self::$alias[$c])              && !self::loadMap('from.', $c, $d)) {                $d = false;            } elseif ('B' === strtoupper($str[$i + 1])) {                $d = base64_decode($str[$i + 2]);            } else {                $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));            }            if (false !== $d) {                if ('' !== $d) {                    if ('' === $d = self::iconv($c, $charset, $d)) {                        $str[$i + 3] = substr($str[$i + 3], 1);                    } else {                        $result .= $d;                    }                }                $d = self::iconv('utf-8', $charset, $str[$i + 3]);                if ('' !== trim($d)) {                    $result .= $d;                }            } elseif (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {                $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";            } else {                $result = false;                break;            }            $i += 4;        }        return $result;    }    public static function iconv_get_encoding($type = 'all')    {        switch ($type) {            case 'input_encoding': return self::$inputEncoding;            case 'output_encoding': return self::$outputEncoding;            case 'internal_encoding': return self::$internalEncoding;        }        return [            'input_encoding' => self::$inputEncoding,            'output_encoding' => self::$outputEncoding,            'internal_encoding' => self::$internalEncoding,        ];    }    public static function iconv_set_encoding($type, $charset)    {        switch ($type) {            case 'input_encoding': self::$inputEncoding = $charset; break;            case 'output_encoding': self::$outputEncoding = $charset; break;            case 'internal_encoding': self::$internalEncoding = $charset; break;            default: return false;        }        return true;    }    public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)    {        if (!\is_array($pref)) {            $pref = [];        }        $pref += [            'scheme' => 'B',            'input-charset' => self::$internalEncoding,            'output-charset' => self::$internalEncoding,            'line-length' => 76,            'line-break-chars' => "\r\n",        ];        if (preg_match('/[\x80-\xFF]/', $fieldName)) {            $fieldName = '';        }        $scheme = strtoupper(substr($pref['scheme'], 0, 1));        $in = strtolower($pref['input-charset']);        $out = strtolower($pref['output-charset']);        if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {            return false;        }        preg_match_all('/./us', $fieldValue, $chars);        $chars = $chars[0] ?? [];        $lineBreak = (int) $pref['line-length'];        $lineStart = "=?{$pref['output-charset']}?{$scheme}?";        $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2;        $lineOffset = \strlen($lineStart) + 3;        $lineData = '';        $fieldValue = [];        $Q = 'Q' === $scheme;        foreach ($chars as $c) {            if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {                return false;            }            $o = $Q                ? $c = preg_replace_callback(                    '/[=_\?\x00-\x1F\x80-\xFF]/',                    [__CLASS__, 'qpByteCallback'],                    $c                )                : base64_encode($lineData.$c);            if (isset($o[$lineBreak - $lineLength])) {                if (!$Q) {                    $lineData = base64_encode($lineData);                }                $fieldValue[] = $lineStart.$lineData.'?=';                $lineLength = $lineOffset;                $lineData = '';            }            $lineData .= $c;            $Q && $lineLength += \strlen($c);        }        if ('' !== $lineData) {            if (!$Q) {                $lineData = base64_encode($lineData);            }            $fieldValue[] = $lineStart.$lineData.'?=';        }        return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);    }    public static function iconv_strlen($s, $encoding = null)    {        static $hasXml = null;        if (null === $hasXml) {            $hasXml = \extension_loaded('xml');        }        if ($hasXml) {            return self::strlen1($s, $encoding);        }        return self::strlen2($s, $encoding);    }    public static function strlen1($s, $encoding = null)    {        if (null === $encoding) {            $encoding = self::$internalEncoding;        }        if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {            return false;        }        return \strlen(utf8_decode($s));    }    public static function strlen2($s, $encoding = null)    {        if (null === $encoding) {            $encoding = self::$internalEncoding;        }        if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {            return false;        }        $ulenMask = self::$ulenMask;        $i = 0;        $j = 0;        $len = \strlen($s);        while ($i < $len) {            $u = $s[$i] & "\xF0";            $i += $ulenMask[$u] ?? 1;            ++$j;        }        return $j;    }    public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)    {        if (null === $encoding) {            $encoding = self::$internalEncoding;        }        if (0 !== stripos($encoding, 'utf-8')) {            if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {                return false;            }            if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {                return false;            }        }        if ($offset = (int) $offset) {            $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');        }        $pos = strpos($haystack, $needle);        return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));    }    public static function iconv_strrpos($haystack, $needle, $encoding = null)    {        if (null === $encoding) {            $encoding = self::$internalEncoding;        }        if (0 !== stripos($encoding, 'utf-8')) {            if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {                return false;            }            if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {                return false;            }        }        $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;        return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');    }    public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)    {        if (null === $encoding) {            $encoding = self::$internalEncoding;        }        if (0 !== stripos($encoding, 'utf-8')) {            $encoding = null;        } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {            return false;        }        $s = (string) $s;        $slen = self::iconv_strlen($s, 'utf-8');        $start = (int) $start;        if (0 > $start) {            $start += $slen;        }        if (0 > $start) {            if (\PHP_VERSION_ID < 80000) {                return false;            }            $start = 0;        }        if ($start >= $slen) {            return \PHP_VERSION_ID >= 80000 ? '' : false;        }        $rx = $slen - $start;        if (0 > $length) {            $length += $rx;        }        if (0 === $length) {            return '';        }        if (0 > $length) {            return \PHP_VERSION_ID >= 80000 ? '' : false;        }        if ($length > $rx) {            $length = $rx;        }        $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';        $s = preg_match($rx, $s, $s) ? $s[1] : '';        if (null === $encoding) {            return $s;        }        return self::iconv('utf-8', $encoding, $s);    }    private static function loadMap($type, $charset, &$map)    {        if (!isset(self::$convertMap[$type.$charset])) {            if (false === $map = self::getData($type.$charset)) {                if ('to.' === $type && self::loadMap('from.', $charset, $map)) {                    $map = array_flip($map);                } else {                    return false;                }            }            self::$convertMap[$type.$charset] = $map;        } else {            $map = self::$convertMap[$type.$charset];        }        return true;    }    private static function utf8ToUtf8($str, $ignore)    {        $ulenMask = self::$ulenMask;        $valid = self::$isValidUtf8;        $u = $str;        $i = $j = 0;        $len = \strlen($str);        while ($i < $len) {            if ($str[$i] < "\x80") {                $u[$j++] = $str[$i++];            } else {                $ulen = $str[$i] & "\xF0";                $ulen = $ulenMask[$ulen] ?? 1;                $uchr = substr($str, $i, $ulen);                if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {                    if ($ignore) {                        ++$i;                        continue;                    }                    trigger_error(self::ERROR_ILLEGAL_CHARACTER);                    return false;                }                $i += $ulen;                $u[$j++] = $uchr[0];                isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])                    && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])                    && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);            }        }        return substr($u, 0, $j);    }    private static function mapToUtf8(&$result, array $map, $str, $ignore)    {        $len = \strlen($str);        for ($i = 0; $i < $len; ++$i) {            if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {                $result .= $map[$str[$i].$str[++$i]];            } elseif (isset($map[$str[$i]])) {                $result .= $map[$str[$i]];            } elseif (!$ignore) {                trigger_error(self::ERROR_ILLEGAL_CHARACTER);                return false;            }        }        return true;    }    private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit)    {        $ulenMask = self::$ulenMask;        $valid = self::$isValidUtf8;        if ($translit && !self::$translitMap) {            self::$translitMap = self::getData('translit');        }        $i = 0;        $len = \strlen($str);        while ($i < $len) {            if ($str[$i] < "\x80") {                $uchr = $str[$i++];            } else {                $ulen = $str[$i] & "\xF0";                $ulen = $ulenMask[$ulen] ?? 1;                $uchr = substr($str, $i, $ulen);                if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {                    ++$i;                    continue;                }                $i += $ulen;            }            if (isset($map[$uchr])) {                $result .= $map[$uchr];            } elseif ($translit) {                if (isset(self::$translitMap[$uchr])) {                    $uchr = self::$translitMap[$uchr];                } elseif ($uchr >= "\xC3\x80") {                    $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);                    if ($uchr[0] < "\x80") {                        $uchr = $uchr[0];                    } elseif ($ignore) {                        continue;                    } else {                        return false;                    }                } elseif ($ignore) {                    continue;                } else {                    return false;                }                $str = $uchr.substr($str, $i);                $len = \strlen($str);                $i = 0;            } elseif (!$ignore) {                return false;            }        }        return true;    }    private static function qpByteCallback(array $m)    {        return '='.strtoupper(dechex(\ord($m[0])));    }    private static function pregOffset($offset)    {        $rx = [];        $offset = (int) $offset;        while ($offset > 65535) {            $rx[] = '.{65535}';            $offset -= 65535;        }        return implode('', $rx).'.{'.$offset.'}';    }    private static function getData($file)    {        if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {            return require $file;        }        return false;    }}
 |