> 6)); $char .= chr(0x80 | ($codePoint & 0x003f)); } elseif ($codePoint == 0xFEFF) { // nop -- zap the BOM } elseif ($codePoint >= 0xD800 && $codePoint <= 0xDFFF) { // found a surrogate return false; } elseif($codePoint <= 0xffff) { $char .= chr(0xe0 | ($codePoint >> 12)); $char .= chr(0x80 | (($codePoint >> 6) & 0x003f)); $char .= chr(0x80 | ($codePoint & 0x003f)); } elseif($codePoint <= 0x10ffff) { $char .= chr(0xf0 | ($codePoint >> 18)); $char .= chr(0x80 | (($codePoint >> 12) & 0x3f)); $char .= chr(0x80 | (($codePoint >> 6) & 0x3f)); $char .= chr(0x80 | ($codePoint & 0x3f)); } else { // out of range return false; } return $char; } // Callback function for utf8FromCP1252() function utf8FromCP1252Char($char) { $utf8CodePoint = array( 128 => 0x20AC, 129 => '', 130 => 0x201A, 131 => 0x0192, 132 => 0x201E, 133 => 0x2026, 134 => 0x2020, 135 => 0x2021, 136 => 0x02C6, 137 => 0x2030, 138 => 0x0160, 139 => 0x2039, 140 => 0x0152, 141 => '', 142 => 0x017D, 143 => '', 144 => '', 145 => 0x2018, 146 => 0x2019, 147 => 0x201C, 148 => 0x201D, 149 => 0x2022, 150 => 0x2013, 151 => 0x2014, 152 => 0x02DC, 153 => 0x2122, 154 => 0x0161, 155 => 0x203A, 156 => 0x0153, 157 => '', 158 => 0x017E, 159 => 0x0178); $cp1252CodePoint = ord($char); return utf8Char($utf8CodePoint[$cp1252CodePoint]); } // Convert the encoding of a string from Windows-1252 to UTF-8 function utf8FromCP1252($string) { if (isCP1252($string)) { $utf8String = utf8_encode($string); return preg_replace_callback('|\xC2([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])|', create_function('$s','return utf8FromCP1252Char($s[1]);'), $utf8String); } else { return ''; } } ?>