1: <?php
2: namespace Yep\Tokenizer;
3:
4: class Tokenizer implements ITokenizer {
5: protected $regexp;
6: protected $keys;
7: protected $count = 0;
8:
9: 10: 11: 12: 13: 14:
15: public function __construct(array $patterns, $flags = '') {
16: $tmp = [];
17:
18: foreach ($patterns as $pattern_key => $pattern_value) {
19: $tmp[] = "?<$pattern_key>$pattern_value";
20: }
21:
22: $this->regexp = '~(' . implode(')|(', $tmp) . ')~Au' . $flags;
23: $this->keys = array_keys($patterns);
24: $this->count = count($patterns);
25: }
26:
27: 28: 29: 30: 31: 32: 33:
34: public function tokenize($input) {
35: preg_match_all($this->regexp, $input, $tokens, PREG_SET_ORDER);
36: $length = 0;
37:
38: foreach ($tokens as $token_i => &$token) {
39: $type = null;
40:
41: for ($i = 0; $i < $this->count; $i++) {
42: if (isset($this->keys[$i], $token[$this->keys[$i]]) && $token[$this->keys[$i]] != null) {
43: $type = $this->keys[$i];
44: break;
45: }
46: }
47:
48: $token = [
49: self::TYPE => $type,
50: self::VALUE => $token[0],
51: self::POSITION => $token_i,
52: self::OFFSET => $length,
53: ];
54:
55: $length += $token[self::LENGTH] = mb_strlen($token[self::VALUE]);
56: }
57:
58: if ($length !== mb_strlen($input)) {
59: $text = mb_substr($input, 0, $length);
60: $line = mb_substr_count($text, "\n") + 1;
61: $col = $length - mb_strrpos("\n$text", "\n") + 1;
62: $token = str_replace("\n", '\n', mb_substr($input, $length, 10));
63:
64: throw new UnexpectedTokenException(sprintf('Unexpected "%s" on line %d, column %d.', $token, $line, $col));
65: }
66:
67: return $tokens;
68: }
69:
70: }
71: