23 private const UTF8_LENGTH_MAP = [
24 "\x00" => 1,
"\x01" => 1,
"\x02" => 1,
"\x03" => 1,
"\x04" => 1,
"\x05" => 1,
"\x06" => 1,
"\x07" => 1,
25 "\x08" => 1,
"\x09" => 1,
"\x0a" => 1,
"\x0b" => 1,
"\x0c" => 1,
"\x0d" => 1,
"\x0e" => 1,
"\x0f" => 1,
26 "\x10" => 1,
"\x11" => 1,
"\x12" => 1,
"\x13" => 1,
"\x14" => 1,
"\x15" => 1,
"\x16" => 1,
"\x17" => 1,
27 "\x18" => 1,
"\x19" => 1,
"\x1a" => 1,
"\x1b" => 1,
"\x1c" => 1,
"\x1d" => 1,
"\x1e" => 1,
"\x1f" => 1,
28 "\x20" => 1,
"\x21" => 1,
"\x22" => 1,
"\x23" => 1,
"\x24" => 1,
"\x25" => 1,
"\x26" => 1,
"\x27" => 1,
29 "\x28" => 1,
"\x29" => 1,
"\x2a" => 1,
"\x2b" => 1,
"\x2c" => 1,
"\x2d" => 1,
"\x2e" => 1,
"\x2f" => 1,
30 "\x30" => 1,
"\x31" => 1,
"\x32" => 1,
"\x33" => 1,
"\x34" => 1,
"\x35" => 1,
"\x36" => 1,
"\x37" => 1,
31 "\x38" => 1,
"\x39" => 1,
"\x3a" => 1,
"\x3b" => 1,
"\x3c" => 1,
"\x3d" => 1,
"\x3e" => 1,
"\x3f" => 1,
32 "\x40" => 1,
"\x41" => 1,
"\x42" => 1,
"\x43" => 1,
"\x44" => 1,
"\x45" => 1,
"\x46" => 1,
"\x47" => 1,
33 "\x48" => 1,
"\x49" => 1,
"\x4a" => 1,
"\x4b" => 1,
"\x4c" => 1,
"\x4d" => 1,
"\x4e" => 1,
"\x4f" => 1,
34 "\x50" => 1,
"\x51" => 1,
"\x52" => 1,
"\x53" => 1,
"\x54" => 1,
"\x55" => 1,
"\x56" => 1,
"\x57" => 1,
35 "\x58" => 1,
"\x59" => 1,
"\x5a" => 1,
"\x5b" => 1,
"\x5c" => 1,
"\x5d" => 1,
"\x5e" => 1,
"\x5f" => 1,
36 "\x60" => 1,
"\x61" => 1,
"\x62" => 1,
"\x63" => 1,
"\x64" => 1,
"\x65" => 1,
"\x66" => 1,
"\x67" => 1,
37 "\x68" => 1,
"\x69" => 1,
"\x6a" => 1,
"\x6b" => 1,
"\x6c" => 1,
"\x6d" => 1,
"\x6e" => 1,
"\x6f" => 1,
38 "\x70" => 1,
"\x71" => 1,
"\x72" => 1,
"\x73" => 1,
"\x74" => 1,
"\x75" => 1,
"\x76" => 1,
"\x77" => 1,
39 "\x78" => 1,
"\x79" => 1,
"\x7a" => 1,
"\x7b" => 1,
"\x7c" => 1,
"\x7d" => 1,
"\x7e" => 1,
"\x7f" => 1,
40 "\x80" => 0,
"\x81" => 0,
"\x82" => 0,
"\x83" => 0,
"\x84" => 0,
"\x85" => 0,
"\x86" => 0,
"\x87" => 0,
41 "\x88" => 0,
"\x89" => 0,
"\x8a" => 0,
"\x8b" => 0,
"\x8c" => 0,
"\x8d" => 0,
"\x8e" => 0,
"\x8f" => 0,
42 "\x90" => 0,
"\x91" => 0,
"\x92" => 0,
"\x93" => 0,
"\x94" => 0,
"\x95" => 0,
"\x96" => 0,
"\x97" => 0,
43 "\x98" => 0,
"\x99" => 0,
"\x9a" => 0,
"\x9b" => 0,
"\x9c" => 0,
"\x9d" => 0,
"\x9e" => 0,
"\x9f" => 0,
44 "\xa0" => 0,
"\xa1" => 0,
"\xa2" => 0,
"\xa3" => 0,
"\xa4" => 0,
"\xa5" => 0,
"\xa6" => 0,
"\xa7" => 0,
45 "\xa8" => 0,
"\xa9" => 0,
"\xaa" => 0,
"\xab" => 0,
"\xac" => 0,
"\xad" => 0,
"\xae" => 0,
"\xaf" => 0,
46 "\xb0" => 0,
"\xb1" => 0,
"\xb2" => 0,
"\xb3" => 0,
"\xb4" => 0,
"\xb5" => 0,
"\xb6" => 0,
"\xb7" => 0,
47 "\xb8" => 0,
"\xb9" => 0,
"\xba" => 0,
"\xbb" => 0,
"\xbc" => 0,
"\xbd" => 0,
"\xbe" => 0,
"\xbf" => 0,
48 "\xc0" => 2,
"\xc1" => 2,
"\xc2" => 2,
"\xc3" => 2,
"\xc4" => 2,
"\xc5" => 2,
"\xc6" => 2,
"\xc7" => 2,
49 "\xc8" => 2,
"\xc9" => 2,
"\xca" => 2,
"\xcb" => 2,
"\xcc" => 2,
"\xcd" => 2,
"\xce" => 2,
"\xcf" => 2,
50 "\xd0" => 2,
"\xd1" => 2,
"\xd2" => 2,
"\xd3" => 2,
"\xd4" => 2,
"\xd5" => 2,
"\xd6" => 2,
"\xd7" => 2,
51 "\xd8" => 2,
"\xd9" => 2,
"\xda" => 2,
"\xdb" => 2,
"\xdc" => 2,
"\xdd" => 2,
"\xde" => 2,
"\xdf" => 2,
52 "\xe0" => 3,
"\xe1" => 3,
"\xe2" => 3,
"\xe3" => 3,
"\xe4" => 3,
"\xe5" => 3,
"\xe6" => 3,
"\xe7" => 3,
53 "\xe8" => 3,
"\xe9" => 3,
"\xea" => 3,
"\xeb" => 3,
"\xec" => 3,
"\xed" => 3,
"\xee" => 3,
"\xef" => 3,
54 "\xf0" => 4,
"\xf1" => 4,
"\xf2" => 4,
"\xf3" => 4,
"\xf4" => 4,
"\xf5" => 4,
"\xf6" => 4,
"\xf7" => 4,
55 "\xf8" => 5,
"\xf9" => 5,
"\xfa" => 5,
"\xfb" => 5,
"\xfc" => 6,
"\xfd" => 6,
"\xfe" => 0,
"\xff" => 0,
59 private $dataSize = 0;
61 private $charCount = 0;
62 private $currentPos = 0;
63 private $fixedWidth = 0;
68 public function __construct($input, ?
string $charset =
'utf-8')
70 $charset = strtolower(trim($charset)) ?:
'utf-8';
71 if (
'utf-8' === $charset ||
'utf8' === $charset) {
72 $this->fixedWidth = 0;
73 $this->map = [
'p' => [],
'i' => []];
81 $this->fixedWidth = 2;
89 $this->fixedWidth = 4;
96 $this->fixedWidth = 1;
99 if (\is_resource($input)) {
101 if (stream_get_meta_data($input)[
'seekable'] ??
false) {
104 while (
false !== $read = fread($input, $blocks)) {
108 $this->
write($input);
112 public function read(
int $length): ?string
114 if ($this->currentPos >= $this->charCount) {
117 $length = ($this->currentPos + $length > $this->charCount) ? $this->charCount - $this->currentPos : $length;
118 if ($this->fixedWidth > 0) {
119 $len = $length * $this->fixedWidth;
120 $ret = substr($this->data, $this->currentPos * $this->fixedWidth, $len);
121 $this->currentPos += $length;
123 $end = $this->currentPos + $length;
124 $end = $end > $this->charCount ? $this->charCount : $end;
127 if ($this->currentPos > 0) {
128 $start = $this->map[
'p'][$this->currentPos - 1];
131 for (; $this->currentPos < $end; ++$this->currentPos) {
132 if (isset($this->map[
'i'][$this->currentPos])) {
133 $ret .= substr($this->data, $start, $to - $start).
'?';
134 $start = $this->map[
'p'][$this->currentPos];
136 $to = $this->map[
'p'][$this->currentPos];
139 $ret .= substr($this->data, $start, $to - $start);
147 if (
null !== $read = $this->
read($length)) {
148 return array_map(
'ord', str_split($read, 1));
156 if ($this->charCount < $charOffset) {
157 $charOffset = $this->charCount;
159 $this->currentPos = $charOffset;
162 public function write(
string $chars): void
165 $this->data .= $chars;
166 if ($this->fixedWidth > 0) {
167 $strlen = \strlen($chars);
168 $ignoredL = $strlen % $this->fixedWidth;
169 $ignored = $ignoredL ? substr($chars, -$ignoredL) :
'';
170 $this->charCount += ($strlen - $ignoredL) / $this->fixedWidth;
172 $this->charCount += $this->getUtf8CharPositions($chars, $this->dataSize, $ignored);
174 $this->dataSize = \strlen($this->data) - \strlen($ignored);
177 private function getUtf8CharPositions(
string $string,
int $startOffset,
string &$ignoredChars): int
179 $strlen = \strlen($string);
180 $charPos = \count($this->map[
'p']);
183 for ($i = 0; $i < $strlen; ++$i) {
185 $size = self::UTF8_LENGTH_MAP[$char];
194 $this->map[
'p'][$charPos + $foundChars] = $startOffset + $i;
195 $this->map[
'i'][$charPos + $foundChars] =
true;
199 if (($i + $size) > $strlen) {
200 $ignoredChars = substr($string, $i);
203 for ($j = 1; $j < $size; ++$j) {
204 $char = $string[$i + $j];
205 if ($char >
"\x7F" && $char <
"\xC0") {
214 $this->
map[
'p'][$charPos + $foundChars] = $startOffset + $i + $size;