DomainPart.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. <?php
  2. namespace Egulias\EmailValidator\Parser;
  3. use Egulias\EmailValidator\EmailLexer;
  4. use Egulias\EmailValidator\Exception\CharNotAllowed;
  5. use Egulias\EmailValidator\Exception\CommaInDomain;
  6. use Egulias\EmailValidator\Exception\ConsecutiveAt;
  7. use Egulias\EmailValidator\Exception\CRLFAtTheEnd;
  8. use Egulias\EmailValidator\Exception\CRNoLF;
  9. use Egulias\EmailValidator\Exception\DomainHyphened;
  10. use Egulias\EmailValidator\Exception\DotAtEnd;
  11. use Egulias\EmailValidator\Exception\DotAtStart;
  12. use Egulias\EmailValidator\Exception\ExpectingATEXT;
  13. use Egulias\EmailValidator\Exception\ExpectingDomainLiteralClose;
  14. use Egulias\EmailValidator\Exception\ExpectingDTEXT;
  15. use Egulias\EmailValidator\Exception\NoDomainPart;
  16. use Egulias\EmailValidator\Exception\UnopenedComment;
  17. use Egulias\EmailValidator\Warning\AddressLiteral;
  18. use Egulias\EmailValidator\Warning\CFWSWithFWS;
  19. use Egulias\EmailValidator\Warning\DeprecatedComment;
  20. use Egulias\EmailValidator\Warning\DomainLiteral;
  21. use Egulias\EmailValidator\Warning\DomainTooLong;
  22. use Egulias\EmailValidator\Warning\IPV6BadChar;
  23. use Egulias\EmailValidator\Warning\IPV6ColonEnd;
  24. use Egulias\EmailValidator\Warning\IPV6ColonStart;
  25. use Egulias\EmailValidator\Warning\IPV6Deprecated;
  26. use Egulias\EmailValidator\Warning\IPV6DoubleColon;
  27. use Egulias\EmailValidator\Warning\IPV6GroupCount;
  28. use Egulias\EmailValidator\Warning\IPV6MaxGroups;
  29. use Egulias\EmailValidator\Warning\LabelTooLong;
  30. use Egulias\EmailValidator\Warning\ObsoleteDTEXT;
  31. use Egulias\EmailValidator\Warning\TLD;
  32. class DomainPart extends Parser
  33. {
  34. const DOMAIN_MAX_LENGTH = 254;
  35. protected $domainPart = '';
  36. public function parse($domainPart)
  37. {
  38. $this->lexer->moveNext();
  39. $this->performDomainStartChecks();
  40. $domain = $this->doParseDomainPart();
  41. $prev = $this->lexer->getPrevious();
  42. $length = strlen($domain);
  43. if ($prev['type'] === EmailLexer::S_DOT) {
  44. throw new DotAtEnd();
  45. }
  46. if ($prev['type'] === EmailLexer::S_HYPHEN) {
  47. throw new DomainHyphened();
  48. }
  49. if ($length > self::DOMAIN_MAX_LENGTH) {
  50. $this->warnings[DomainTooLong::CODE] = new DomainTooLong();
  51. }
  52. if ($prev['type'] === EmailLexer::S_CR) {
  53. throw new CRLFAtTheEnd();
  54. }
  55. $this->domainPart = $domain;
  56. }
  57. private function performDomainStartChecks()
  58. {
  59. $this->checkInvalidTokensAfterAT();
  60. $this->checkEmptyDomain();
  61. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  62. $this->warnings[DeprecatedComment::CODE] = new DeprecatedComment();
  63. $this->parseDomainComments();
  64. }
  65. }
  66. private function checkEmptyDomain()
  67. {
  68. $thereIsNoDomain = $this->lexer->token['type'] === EmailLexer::S_EMPTY ||
  69. ($this->lexer->token['type'] === EmailLexer::S_SP &&
  70. !$this->lexer->isNextToken(EmailLexer::GENERIC));
  71. if ($thereIsNoDomain) {
  72. throw new NoDomainPart();
  73. }
  74. }
  75. private function checkInvalidTokensAfterAT()
  76. {
  77. if ($this->lexer->token['type'] === EmailLexer::S_DOT) {
  78. throw new DotAtStart();
  79. }
  80. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN) {
  81. throw new DomainHyphened();
  82. }
  83. }
  84. public function getDomainPart()
  85. {
  86. return $this->domainPart;
  87. }
  88. public function checkIPV6Tag($addressLiteral, $maxGroups = 8)
  89. {
  90. $prev = $this->lexer->getPrevious();
  91. if ($prev['type'] === EmailLexer::S_COLON) {
  92. $this->warnings[IPV6ColonEnd::CODE] = new IPV6ColonEnd();
  93. }
  94. $IPv6 = substr($addressLiteral, 5);
  95. //Daniel Marschall's new IPv6 testing strategy
  96. $matchesIP = explode(':', $IPv6);
  97. $groupCount = count($matchesIP);
  98. $colons = strpos($IPv6, '::');
  99. if (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT)) !== 0) {
  100. $this->warnings[IPV6BadChar::CODE] = new IPV6BadChar();
  101. }
  102. if ($colons === false) {
  103. // We need exactly the right number of groups
  104. if ($groupCount !== $maxGroups) {
  105. $this->warnings[IPV6GroupCount::CODE] = new IPV6GroupCount();
  106. }
  107. return;
  108. }
  109. if ($colons !== strrpos($IPv6, '::')) {
  110. $this->warnings[IPV6DoubleColon::CODE] = new IPV6DoubleColon();
  111. return;
  112. }
  113. if ($colons === 0 || $colons === (strlen($IPv6) - 2)) {
  114. // RFC 4291 allows :: at the start or end of an address
  115. //with 7 other groups in addition
  116. ++$maxGroups;
  117. }
  118. if ($groupCount > $maxGroups) {
  119. $this->warnings[IPV6MaxGroups::CODE] = new IPV6MaxGroups();
  120. } elseif ($groupCount === $maxGroups) {
  121. $this->warnings[IPV6Deprecated::CODE] = new IPV6Deprecated();
  122. }
  123. }
  124. protected function doParseDomainPart()
  125. {
  126. $domain = '';
  127. $openedParenthesis = 0;
  128. do {
  129. $prev = $this->lexer->getPrevious();
  130. $this->checkNotAllowedChars($this->lexer->token);
  131. if ($this->lexer->token['type'] === EmailLexer::S_OPENPARENTHESIS) {
  132. $this->parseComments();
  133. $openedParenthesis += $this->getOpenedParenthesis();
  134. $this->lexer->moveNext();
  135. $tmpPrev = $this->lexer->getPrevious();
  136. if ($tmpPrev['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  137. $openedParenthesis--;
  138. }
  139. }
  140. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEPARENTHESIS) {
  141. if ($openedParenthesis === 0) {
  142. throw new UnopenedComment();
  143. } else {
  144. $openedParenthesis--;
  145. }
  146. }
  147. $this->checkConsecutiveDots();
  148. $this->checkDomainPartExceptions($prev);
  149. if ($this->hasBrackets()) {
  150. $this->parseDomainLiteral();
  151. }
  152. $this->checkLabelLength($prev);
  153. if ($this->isFWS()) {
  154. $this->parseFWS();
  155. }
  156. $domain .= $this->lexer->token['value'];
  157. $this->lexer->moveNext();
  158. } while (null !== $this->lexer->token['type']);
  159. return $domain;
  160. }
  161. private function checkNotAllowedChars($token)
  162. {
  163. $notAllowed = [EmailLexer::S_BACKSLASH => true, EmailLexer::S_SLASH=> true];
  164. if (isset($notAllowed[$token['type']])) {
  165. throw new CharNotAllowed();
  166. }
  167. }
  168. protected function parseDomainLiteral()
  169. {
  170. if ($this->lexer->isNextToken(EmailLexer::S_COLON)) {
  171. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  172. }
  173. if ($this->lexer->isNextToken(EmailLexer::S_IPV6TAG)) {
  174. $lexer = clone $this->lexer;
  175. $lexer->moveNext();
  176. if ($lexer->isNextToken(EmailLexer::S_DOUBLECOLON)) {
  177. $this->warnings[IPV6ColonStart::CODE] = new IPV6ColonStart();
  178. }
  179. }
  180. return $this->doParseDomainLiteral();
  181. }
  182. protected function doParseDomainLiteral()
  183. {
  184. $IPv6TAG = false;
  185. $addressLiteral = '';
  186. do {
  187. if ($this->lexer->token['type'] === EmailLexer::C_NUL) {
  188. throw new ExpectingDTEXT();
  189. }
  190. if ($this->lexer->token['type'] === EmailLexer::INVALID ||
  191. $this->lexer->token['type'] === EmailLexer::C_DEL ||
  192. $this->lexer->token['type'] === EmailLexer::S_LF
  193. ) {
  194. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  195. }
  196. if ($this->lexer->isNextTokenAny(array(EmailLexer::S_OPENQBRACKET, EmailLexer::S_OPENBRACKET))) {
  197. throw new ExpectingDTEXT();
  198. }
  199. if ($this->lexer->isNextTokenAny(
  200. array(EmailLexer::S_HTAB, EmailLexer::S_SP, $this->lexer->token['type'] === EmailLexer::CRLF)
  201. )) {
  202. $this->warnings[CFWSWithFWS::CODE] = new CFWSWithFWS();
  203. $this->parseFWS();
  204. }
  205. if ($this->lexer->isNextToken(EmailLexer::S_CR)) {
  206. throw new CRNoLF();
  207. }
  208. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH) {
  209. $this->warnings[ObsoleteDTEXT::CODE] = new ObsoleteDTEXT();
  210. $addressLiteral .= $this->lexer->token['value'];
  211. $this->lexer->moveNext();
  212. $this->validateQuotedPair();
  213. }
  214. if ($this->lexer->token['type'] === EmailLexer::S_IPV6TAG) {
  215. $IPv6TAG = true;
  216. }
  217. if ($this->lexer->token['type'] === EmailLexer::S_CLOSEQBRACKET) {
  218. break;
  219. }
  220. $addressLiteral .= $this->lexer->token['value'];
  221. } while ($this->lexer->moveNext());
  222. $addressLiteral = str_replace('[', '', $addressLiteral);
  223. $addressLiteral = $this->checkIPV4Tag($addressLiteral);
  224. if (false === $addressLiteral) {
  225. return $addressLiteral;
  226. }
  227. if (!$IPv6TAG) {
  228. $this->warnings[DomainLiteral::CODE] = new DomainLiteral();
  229. return $addressLiteral;
  230. }
  231. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  232. $this->checkIPV6Tag($addressLiteral);
  233. return $addressLiteral;
  234. }
  235. protected function checkIPV4Tag($addressLiteral)
  236. {
  237. $matchesIP = array();
  238. // Extract IPv4 part from the end of the address-literal (if there is one)
  239. if (preg_match(
  240. '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
  241. $addressLiteral,
  242. $matchesIP
  243. ) > 0
  244. ) {
  245. $index = strrpos($addressLiteral, $matchesIP[0]);
  246. if ($index === 0) {
  247. $this->warnings[AddressLiteral::CODE] = new AddressLiteral();
  248. return false;
  249. }
  250. // Convert IPv4 part to IPv6 format for further testing
  251. $addressLiteral = substr($addressLiteral, 0, $index) . '0:0';
  252. }
  253. return $addressLiteral;
  254. }
  255. protected function checkDomainPartExceptions($prev)
  256. {
  257. $invalidDomainTokens = array(
  258. EmailLexer::S_DQUOTE => true,
  259. EmailLexer::S_SEMICOLON => true,
  260. EmailLexer::S_GREATERTHAN => true,
  261. EmailLexer::S_LOWERTHAN => true,
  262. );
  263. if (isset($invalidDomainTokens[$this->lexer->token['type']])) {
  264. throw new ExpectingATEXT();
  265. }
  266. if ($this->lexer->token['type'] === EmailLexer::S_COMMA) {
  267. throw new CommaInDomain();
  268. }
  269. if ($this->lexer->token['type'] === EmailLexer::S_AT) {
  270. throw new ConsecutiveAt();
  271. }
  272. if ($this->lexer->token['type'] === EmailLexer::S_OPENQBRACKET && $prev['type'] !== EmailLexer::S_AT) {
  273. throw new ExpectingATEXT();
  274. }
  275. if ($this->lexer->token['type'] === EmailLexer::S_HYPHEN && $this->lexer->isNextToken(EmailLexer::S_DOT)) {
  276. throw new DomainHyphened();
  277. }
  278. if ($this->lexer->token['type'] === EmailLexer::S_BACKSLASH
  279. && $this->lexer->isNextToken(EmailLexer::GENERIC)) {
  280. throw new ExpectingATEXT();
  281. }
  282. }
  283. protected function hasBrackets()
  284. {
  285. if ($this->lexer->token['type'] !== EmailLexer::S_OPENBRACKET) {
  286. return false;
  287. }
  288. try {
  289. $this->lexer->find(EmailLexer::S_CLOSEBRACKET);
  290. } catch (\RuntimeException $e) {
  291. throw new ExpectingDomainLiteralClose();
  292. }
  293. return true;
  294. }
  295. protected function checkLabelLength($prev)
  296. {
  297. if ($this->lexer->token['type'] === EmailLexer::S_DOT &&
  298. $prev['type'] === EmailLexer::GENERIC &&
  299. strlen($prev['value']) > 63
  300. ) {
  301. $this->warnings[LabelTooLong::CODE] = new LabelTooLong();
  302. }
  303. }
  304. protected function parseDomainComments()
  305. {
  306. $this->isUnclosedComment();
  307. while (!$this->lexer->isNextToken(EmailLexer::S_CLOSEPARENTHESIS)) {
  308. $this->warnEscaping();
  309. $this->lexer->moveNext();
  310. }
  311. $this->lexer->moveNext();
  312. if ($this->lexer->isNextToken(EmailLexer::S_DOT)) {
  313. throw new ExpectingATEXT();
  314. }
  315. }
  316. protected function addTLDWarnings()
  317. {
  318. if ($this->warnings[DomainLiteral::CODE]) {
  319. $this->warnings[TLD::CODE] = new TLD();
  320. }
  321. }
  322. }