Erebot  latest
A modular IRC bot for PHP 7.0+
Identity.php
1 <?php
2 /*
3  This file is part of Erebot, a modular IRC bot written in PHP.
4 
5  Copyright © 2010 François Poirotte
6 
7  Erebot is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  Erebot is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with Erebot. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 namespace Erebot;
22 
27 class Identity implements \Erebot\Interfaces\Identity
28 {
30  protected $nick;
31 
33  protected $ident;
34 
36  protected $host;
37 
49  public function __construct($user)
50  {
51  if (!is_string($user)) {
52  throw new \Erebot\InvalidValueException('Not a valid identity');
53  }
54 
55  $ident = null;
56  $host = null;
57  $nick = null;
58  $pos = strpos($user, '!');
59  if ($pos !== false) {
60  $parts = explode('@', substr($user, $pos + 1));
61  if (count($parts) != 2) {
62  throw new \Erebot\InvalidValueException('Invalid mask');
63  }
64 
65  $nick = substr($user, 0, $pos);
66  $ident = $parts[0];
67  $host = $parts[1];
68 
69  if ($nick === false || $ident == '' || $host == '') {
70  throw new \Erebot\InvalidValueException('Invalid mask');
71  }
72  } elseif (strpos($user, '@') !== false) {
73  // If there is a "@" but no "!", this is also invalid.
74  throw new \Erebot\InvalidValueException('Invalid mask');
75  } else {
76  $nick = $user;
77  }
78 
79  $this->nick = $nick;
80  $this->ident = $ident;
81 
82  if ($host === null) {
83  $this->host = null;
84  } else {
85  $this->host = self::canonicalizeHost(
86  $host,
87  \Erebot\Interfaces\Identity::CANON_IPV6,
88  false
89  );
90  }
91  }
92 
93  public function getNick()
94  {
95  return $this->nick;
96  }
97 
98  public function getIdent()
99  {
100  return $this->ident;
101  }
102 
122  protected static function stripLeading(&$number, $key)
123  {
124  $stripped = ltrim($number, '0');
125  $number = ($stripped == '' ? '0' : $stripped);
126  }
127 
165  protected static function canonicalizeHost($host, $c10n, $uncompressed)
166  {
167  if ($c10n != \Erebot\Interfaces\Identity::CANON_IPV4 &&
168  $c10n != \Erebot\Interfaces\Identity::CANON_IPV6) {
169  throw new \Erebot\InvalidValueException(
170  'Invalid canonicalization value'
171  );
172  }
173 
174  $decOctet = '(?:\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])';
175  $dotAddress = $decOctet.'(?:\\.'.$decOctet.'){3}';
176 
177  // If it's an IPv4 address, handle it here.
178  // Must appear before the test for hostnames (see RFC 1123, §2.1).
179  if (preg_match('/^'.$dotAddress.'$/Di', $host)) {
180  $parts = explode('.', $host, 4);
181  $prefix = ($uncompressed ? '0:0:0:0:0' : ':');
182  if ($c10n == \Erebot\Interfaces\Identity::CANON_IPV4) {
183  array_walk($parts, array('self', 'stripLeading'));
184  return $prefix.':ffff:'.implode('.', $parts);
185  }
186 
187  $mapped = array(
188  sprintf('%02x%02x', $parts[0], $parts[1]),
189  sprintf('%02x%02x', $parts[2], $parts[3]),
190  );
191  array_walk($mapped, array('self', 'stripLeading'));
192  return $prefix.':ffff:'.implode(':', $mapped);
193  }
194 
195  // Adapted from the grammar & rules in RFC 1034, section 3.5,
196  // with an update from the RFC 1123, section 2.1 regarding the
197  // first character.
198  $label = '[A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9]?';
199  $hostname = '(?:'.$label.'\.)*'.$label;
200 
201  // If this is some hostname, we simply lowercase it.
202  if (preg_match('/^'.$hostname.'$/Di', $host)) {
203  // RFC 1123 says the top-level label in a FQDN
204  // can never be all-numeric (avoids ambiguity
205  // with IPv4 addresses in dotted notation).
206  $last = strrchr($host, '.');
207  if ($last === false || strspn($last, '.1234567890') != strlen($last)) {
208  return strtolower($host);
209  }
210  }
211 
212  $half = '[[:xdigit:]]{1,4}';
213  $long = '(?:'.$half.':'.$half.'|('.$dotAddress.'))';
214  $colonAddress =
215  '(?:'.
216  '(?:'.$half.':){6}'.$long.'|'.
217  '::(?:'.$half.':){5}'.$long.'|'.
218  '(?:'.$half.')?::(?:'.$half.':){4}'.$long.'|'.
219  '(?:(?:'.$half.':)?'.$half.')?::(?:'.$half.':){3}'.$long.'|'.
220  '(?:(?:'.$half.':){0,2}'.$half.')?::(?:'.$half.':){2}'.$long.'|'.
221  '(?:(?:'.$half.':){0,3}'.$half.')?::'.$half.':'.$long.'|'.
222  '(?:(?:'.$half.':){0,4}'.$half.')?::'.$long.'|'.
223  '(?:(?:'.$half.':){0,5}'.$half.')?::'.$half.'|'.
224  '(?:(?:'.$half.':){0,6}'.$half.')?::'.
225  ')';
226 
227  // Is it an IPv6? maybe not...
228  if (!preg_match('/^'.$colonAddress.'$/Di', $host, $matches)) {
229  throw new \Erebot\InvalidValueException(
230  'Unrecognized "host" ('.$host.')'
231  );
232  }
233 
234  // It's an IPv6 alright! Let's handle it.
235  if (count($matches) > 1) {
236  // IPv6 mapped IPv4.
237  $mapped = end($matches);
238  $parts = explode('.', $mapped, 4);
239  $mapped = array(
240  sprintf('%02x%02x', $parts[0], $parts[1]),
241  sprintf('%02x%02x', $parts[2], $parts[3]),
242  );
243  array_walk($mapped, array('self', 'stripLeading'));
244  $host = str_replace(end($matches), implode(':', $mapped), $host);
245  }
246 
247  // Handle "::".
248  $pos = strpos($host, '::');
249  if ($pos !== false) {
250  if (substr($host, 0, 2) == '::') {
251  $host = '0'.$host;
252  }
253  if (substr($host, -2) == '::') {
254  $host .= '0';
255  }
256  $repeat = 8 - substr_count($host, ':');
257  $host = str_replace('::', ':'.str_repeat('0:', $repeat), $host);
258  }
259 
260  // Remove superfluous leading zeros.
261  $parts = explode(':', $host, 8);
262  array_walk($parts, array('self', 'stripLeading'));
263  if ($c10n == \Erebot\Interfaces\Identity::CANON_IPV4) {
264  $parts[7] = (hexdec($parts[6]) << 16) + hexdec($parts[7]);
265  $parts[6] = long2ip(array_pop($parts));
266  }
267 
268  if ($uncompressed) {
269  return strtolower(implode(':', $parts));
270  }
271 
272  // Compress the zeros.
273  $host = 'x:' . implode(':', $parts) . ':x';
274  for ($i = 8; $i > 1; $i--) {
275  $s = ':'.str_repeat('0:', $i);
276  $pos = strpos($host, $s);
277  if ($pos !== false) {
278  $host = (string) substr($host, 0, $pos) . '::' .
279  (string) substr($host, $pos + strlen($s));
280  break;
281  }
282  }
283 
284  $host = str_replace(array('x::', '::x'), '::', $host);
285  $host = str_replace(array('x:', ':x'), '', $host);
286  return strtolower($host);
287  }
288 
289  public function getHost($c10n)
290  {
291  if ($this->host === null) {
292  return null;
293  }
294  if ($c10n == \Erebot\Interfaces\Identity::CANON_IPV6) {
295  return $this->host;
296  }
297  return self::canonicalizeHost($this->host, $c10n, false);
298  }
299 
300  public function getMask($c10n)
301  {
302  $ident = ($this->ident === null) ? '*' : $this->ident;
303  $host = ($this->host === null) ? '*' : $this->getHost($c10n);
304  return $this->nick.'!'.$ident.'@'.$host;
305  }
306 
307  public function __toString()
308  {
309  return $this->nick;
310  }
311 
326  public function match($pattern, \Erebot\Interfaces\IrcCollator $collator)
327  {
328  $nick = explode('!', $pattern, 2);
329  if (count($nick) != 2) {
330  return false;
331  }
332 
333  $ident = explode('@', $nick[1], 2);
334  if (count($ident) != 2) {
335  return false;
336  }
337 
338  $host = $ident[1];
339  $ident = $ident[0];
340  $nick = $nick[0];
341 
342  if ($ident == '' || $host == '') {
343  return false;
344  }
345 
346  $nick = $collator->normalizeNick($nick);
347  $thisNick = $collator->normalizeNick($this->nick);
348  if (!preg_match(self::patternize($nick, true), $thisNick)) {
349  return false;
350  }
351 
352  $thisIdent = ($this->ident === null) ? '' : $this->ident;
353  if (!preg_match(self::patternize($ident, true), $thisIdent)) {
354  return false;
355  }
356 
357  $thisHost = (
358  ($this->host === null) ?
359  '' :
360  self::canonicalizeHost(
361  $this->host,
362  \Erebot\Interfaces\Identity::CANON_IPV6,
363  true
364  )
365  );
366 
367  // Detect a raw IPv4. The patterns allows the use of "*" where
368  // a number is usually expected, as well as "a.b.c.d/netmask".
369  // Must appear before the test for hostnames (see RFC 1123, §2.1).
370  $decOctet = '(?:\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5]|\\*)';
371  $dotAddress = $decOctet.'(?:\\.'.$decOctet.'){3}(?:/[0-9]*)?';
372  $dotAddress = '#^'.$dotAddress.'$#Di';
373  $isDottedAddress = (bool) preg_match($dotAddress, $host);
374 
375  // It's some hostname (not an IPv4).
376  if (strpos($host, ':') === false && !$isDottedAddress) {
377  return (bool) preg_match(
378  self::patternize($host, false),
379  $thisHost
380  );
381  }
382 
383  // Handle wildcards for IPv6 mapped IPv4.
384  $host = explode('/', $host, 2);
385  if (strpos($host[0], '*') !== false) {
386  if (count($host) == 2) {
387  throw new \Erebot\InvalidValueException(
388  "Wildcard characters and netmasks ".
389  "don't go together very well"
390  );
391  }
392 
393  $replace = '';
394  $host[1] = 128;
395  for ($i = 0; $i < 4; $i++) {
396  $sep = (($i == 3) ? ($isDottedAddress ? '' : ':') : '.');
397  if (substr($host[0], -2) == $sep . '*') {
398  $host[1] -= 8;
399  $host[0] = substr($host[0], 0, -2);
400  $replace = $sep.'0'.$replace;
401  }
402  }
403  $host[0] .= $replace;
404  // We could check whether some wildcards remain or not,
405  // but self::canonicalizeHost will raise an exception
406  // for such a pattern anyway.
407  } elseif (count($host) == 1) {
408  // No netmask given, assume /128.
409  $host[] = 128;
410  } else {
411  $host[1] = ((int) $host[1]) + ($isDottedAddress ? 96 : 0);
412  }
413 
414  if ($host[1] < 0 || $host[1] > 128) {
415  throw new \Erebot\InvalidValueException(
416  'Invalid netmask value ('.$host[1].')'
417  );
418  }
419 
420  $host[0] = self::canonicalizeHost(
421  $host[0],
422  \Erebot\Interfaces\Identity::CANON_IPV6,
423  true
424  );
425 
426  $pattParts = explode(':', $host[0]);
427  $thisParts = explode(':', $thisHost);
428  while ($host[1] > 0) {
429  $mask = 0x10000 - (1 << (16 - min($host[1], 16)));
430  $pattValue = hexdec(array_shift($pattParts)) & $mask;
431  $thisValue = hexdec(array_shift($thisParts)) & $mask;
432  if ($pattValue != $thisValue) {
433  return false;
434  }
435  $host[1] -= 16;
436  }
437  return true;
438  }
439 
457  protected static function patternize($pattern, $matchDot)
458  {
459  $realPattern = '';
460  $mapping = array('[^\\.]', '.');
461  for ($i = 0, $len = strlen($pattern); $i < $len; $i++) {
462  switch ($pattern[$i]) {
463  case '?':
464  case '*':
465  if ($matchDot) {
466  $realPattern .= $mapping[1];
467  } else {
468  // For wildcards when not running in $matchDot mode:
469  // allow them to match a dot when followed with a '*'
470  // (ie. '?*' or '**').
471  if ((($i + 1) < $len && $pattern[$i + 1] == '*')) {
472  $realPattern .= $mapping[1];
473  $i++;
474  } else {
475  $realPattern .= $mapping[0];
476  }
477  }
478 
479  if ($pattern[$i] == '*') {
480  $realPattern .= '*';
481  }
482  continue;
483 
484  default:
485  $realPattern .= preg_quote($pattern[$i], '#');
486  }
487  }
488  return '#^'.$realPattern.'$#Di';
489  }
490 }
static patternize($pattern, $matchDot)
Definition: Identity.php:457
$host
Host part for this user identity, either a string or null.
Definition: Identity.php:36
$nick
Nickname for this user identity, either a string or null.
Definition: Identity.php:30
__construct($user)
Definition: Identity.php:49
static stripLeading(&$number, $key)
Definition: Identity.php:122
static canonicalizeHost($host, $c10n, $uncompressed)
Definition: Identity.php:165
Represents the identity of an IRC user.
Definition: Identity.php:27
$ident
Identity string for this user identity, either a string or null.
Definition: Identity.php:33
match($pattern,\Erebot\Interfaces\IrcCollator $collator)
Definition: Identity.php:326