}
/**
- * RFC(2)822 Email Parser.
- *
- * By Cal Henderson <cal@iamcal.com>
- * This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License
- * http://creativecommons.org/licenses/by-sa/2.5/
- *
- * http://code.iamcal.com/php/rfc822/
- *
- * http://iamcal.com/publish/articles/php/parsing_email
- *
- * Revision 4
- *
* @param string $email An individual email address to validate.
*
* @return boolean
*/
function validateEmailAddress($email)
{
- static $comment_regexp, $email_regexp;
- if ($comment_regexp === null) {
- $this->_defineValidationRegexps($comment_regexp, $email_regexp);
- }
-
- // We need to strip comments first (repeat until we can't find
- // any more).
- while (true) {
- $new = preg_replace("!$comment_regexp!", '', $email);
- if (strlen($new) == strlen($email)){
- break;
- }
- $email = $new;
- }
-
- // Now match what's left.
- $result = (bool)preg_match("!^$email_regexp$!", $email);
+ $result = $this->_isRfc3696ValidEmailAddress($email);
if ($result && $this->_check_smtp) {
$result = $this->validateEmailAddressSmtp($email);
}
}
/**
- * RFC(2)822 Email Parser.
+ * RFC3696 Email Parser
*
* By Cal Henderson <cal@iamcal.com>
- * This code is licensed under a Creative Commons Attribution-ShareAlike 2.5 License
- * http://creativecommons.org/licenses/by-sa/2.5/
- *
- * http://code.iamcal.com/php/rfc822/
- *
- * http://iamcal.com/publish/articles/php/parsing_email
*
- * Revision 4
- *
- * @param string &$comment The regexp for comments.
- * @param string &$addr_spec The regexp for email addresses.
- */
- function _defineValidationRegexps(&$comment, &$addr_spec)
- {
- /**
- * NO-WS-CTL = %d1-8 / ; US-ASCII control characters
- * %d11 / ; that do not include the
- * %d12 / ; carriage return, line feed,
- * %d14-31 / ; and white space characters
- * %d127
- * ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
- * DIGIT = %x30-39
- */
+ * This code is dual licensed:
+ * CC Attribution-ShareAlike 2.5 - http://creativecommons.org/licenses/by-sa/2.5/
+ * GPLv3 - http://www.gnu.org/copyleft/gpl.html
+ */
+ protected function _isRfc3696ValidEmailAddress($email)
+ {
+ ####################################################################################
+ #
+ # NO-WS-CTL = %d1-8 / ; US-ASCII control characters
+ # %d11 / ; that do not include the
+ # %d12 / ; carriage return, line feed,
+ # %d14-31 / ; and white space characters
+ # %d127
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
+ # DIGIT = %x30-39
+
$no_ws_ctl = "[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]";
$alpha = "[\\x41-\\x5a\\x61-\\x7a]";
$digit = "[\\x30-\\x39]";
- $cr = "\\x0d";
- $lf = "\\x0a";
- $crlf = "($cr$lf)";
-
- /**
- * obs-char = %d0-9 / %d11 / ; %d0-127 except CR and
- * %d12 / %d14-127 ; LF
- * obs-text = *LF *CR *(obs-char *LF *CR)
- * text = %d1-9 / ; Characters excluding CR and LF
- * %d11 /
- * %d12 /
- * %d14-127 /
- * obs-text
- * obs-qp = "\" (%d0-127)
- * quoted-pair = ("\" text) / obs-qp
- */
- $obs_char = "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]";
- $obs_text = "($lf*$cr*($obs_char$lf*$cr*)*)";
- $text = "([\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f]|$obs_text)";
- $obs_qp = "(\\x5c[\\x00-\\x7f])";
- $quoted_pair = "(\\x5c$text|$obs_qp)";
-
- /**
- * obs-FWS = 1*WSP *(CRLF 1*WSP)
- * FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space
- * obs-FWS
- * ctext = NO-WS-CTL / ; Non white space controls
- * %d33-39 / ; The rest of the US-ASCII
- * %d42-91 / ; characters not including "(",
- * %d93-126 ; ")", or "\"
- * ccontent = ctext / quoted-pair / comment
- * comment = "(" *([FWS] ccontent) [FWS] ")"
- * CFWS = *([FWS] comment) (([FWS] comment) / FWS)
- *
- * @note: We translate ccontent only partially to avoid an
- * infinite loop. Instead, we'll recursively strip comments
- * before processing the input.
- */
+ $cr = "\\x0d";
+ $lf = "\\x0a";
+ $crlf = "(?:$cr$lf)";
+
+
+ ####################################################################################
+ #
+ # obs-char = %d0-9 / %d11 / ; %d0-127 except CR and
+ # %d12 / %d14-127 ; LF
+ # obs-text = *LF *CR *(obs-char *LF *CR)
+ # text = %d1-9 / ; Characters excluding CR and LF
+ # %d11 /
+ # %d12 /
+ # %d14-127 /
+ # obs-text
+ # obs-qp = "\" (%d0-127)
+ # quoted-pair = ("\" text) / obs-qp
+
+ $obs_char = "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]";
+ $obs_text = "(?:$lf*$cr*(?:$obs_char$lf*$cr*)*)";
+ $text = "(?:[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f]|$obs_text)";
+
+ #
+ # there's an issue with the definition of 'text', since 'obs_text' can
+ # be blank and that allows qp's with no character after the slash. we're
+ # treating that as bad, so this just checks we have at least one
+ # (non-CRLF) character
+ #
+
+ $text = "(?:$lf*$cr*$obs_char$lf*$cr*)";
+ $obs_qp = "(?:\\x5c[\\x00-\\x7f])";
+ $quoted_pair = "(?:\\x5c$text|$obs_qp)";
+
+
+ ####################################################################################
+ #
+ # obs-FWS = 1*WSP *(CRLF 1*WSP)
+ # FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space
+ # obs-FWS
+ # ctext = NO-WS-CTL / ; Non white space controls
+ # %d33-39 / ; The rest of the US-ASCII
+ # %d42-91 / ; characters not including "(",
+ # %d93-126 ; ")", or "\"
+ # ccontent = ctext / quoted-pair / comment
+ # comment = "(" *([FWS] ccontent) [FWS] ")"
+ # CFWS = *([FWS] comment) (([FWS] comment) / FWS)
+
+ #
+ # note: we translate ccontent only partially to avoid an infinite loop
+ # instead, we'll recursively strip *nested* comments before processing
+ # the input. that will leave 'plain old comments' to be matched during
+ # the main parse.
+ #
+
$wsp = "[\\x20\\x09]";
- $obs_fws = "($wsp+($crlf$wsp+)*)";
- $fws = "((($wsp*$crlf)?$wsp+)|$obs_fws)";
- $ctext = "($no_ws_ctl|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])";
- $ccontent = "($ctext|$quoted_pair)";
- $comment = "(\\x28($fws?$ccontent)*$fws?\\x29)";
- $cfws = "(($fws?$comment)*($fws?$comment|$fws))";
- $cfws = "$fws*";
-
- /**
- * atext = ALPHA / DIGIT / ; Any character except controls,
- * "!" / "#" / ; SP, and specials.
- * "$" / "%" / ; Used for atoms
- * "&" / "'" /
- * "*" / "+" /
- * "-" / "/" /
- * "=" / "?" /
- * "^" / "_" /
- * "`" / "{" /
- * "|" / "}" /
- * "~"
- * atom = [CFWS] 1*atext [CFWS]
- */
- $atext = "($alpha|$digit|[\\x21\\x23-\\x27\\x2a\\x2b\\x2d\\x2e\\x3d\\x3f\\x5e\\x5f\\x60\\x7b-\\x7e])";
- $atom = "($cfws?$atext+$cfws?)";
-
- /**
- * qtext = NO-WS-CTL / ; Non white space controls
- * %d33 / ; The rest of the US-ASCII
- * %d35-91 / ; characters not including "\"
- * %d93-126 ; or the quote character
- * qcontent = qtext / quoted-pair
- * quoted-string = [CFWS]
- * DQUOTE *([FWS] qcontent) [FWS] DQUOTE
- * [CFWS]
- * word = atom / quoted-string
- */
- $qtext = "($no_ws_ctl|[\\x21\\x23-\\x5b\\x5d-\\x7e])";
- $qcontent = "($qtext|$quoted_pair)";
- $quoted_string = "($cfws?\\x22($fws?$qcontent)*$fws?\\x22$cfws?)";
- $word = "($atom|$quoted_string)";
-
- /**
- * obs-local-part = word *("." word)
- * obs-domain = atom *("." atom)
- */
- $obs_local_part = "($word(\\x2e$word)*)";
- $obs_domain = "($atom(\\x2e$atom)*)";
-
- /**
- * dot-atom-text = 1*atext *("." 1*atext)
- * dot-atom = [CFWS] dot-atom-text [CFWS]
- */
- $dot_atom_text = "($atext+(\\x2e$atext+)*)";
- $dot_atom = "($cfws?$dot_atom_text$cfws?)";
-
- /**
- * domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
- * dcontent = dtext / quoted-pair
- * dtext = NO-WS-CTL / ; Non white space controls
- *
- * %d33-90 / ; The rest of the US-ASCII
- * %d94-126 ; characters not including "[",
- * ; "]", or "\"
- */
- $dtext = "($no_ws_ctl|[\\x21-\\x5a\\x5e-\\x7e])";
- $dcontent = "($dtext|$quoted_pair)";
- $domain_literal = "($cfws?\\x5b($fws?$dcontent)*$fws?\\x5d$cfws?)";
-
- /**
- * local-part = dot-atom / quoted-string / obs-local-part
- * domain = dot-atom / domain-literal / obs-domain
- * addr-spec = local-part "@" domain
- */
- $local_part = "($dot_atom|$quoted_string|$obs_local_part)";
- $domain = "($dot_atom|$domain_literal|$obs_domain)";
- $addr_spec = "($local_part\\x40$domain)";
+ $obs_fws = "(?:$wsp+(?:$crlf$wsp+)*)";
+ $fws = "(?:(?:(?:$wsp*$crlf)?$wsp+)|$obs_fws)";
+ $ctext = "(?:$no_ws_ctl|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])";
+ $ccontent = "(?:$ctext|$quoted_pair)";
+ $comment = "(?:\\x28(?:$fws?$ccontent)*$fws?\\x29)";
+ $cfws = "(?:(?:$fws?$comment)*(?:$fws?$comment|$fws))";
+
+
+ #
+ # these are the rules for removing *nested* comments. we'll just detect
+ # outer comment and replace it with an empty comment, and recurse until
+ # we stop.
+ #
+
+ $outer_ccontent_dull = "(?:$fws?$ctext|$quoted_pair)";
+ $outer_ccontent_nest = "(?:$fws?$comment)";
+ $outer_comment = "(?:\\x28$outer_ccontent_dull*(?:$outer_ccontent_nest$outer_ccontent_dull*)+$fws?\\x29)";
+
+
+ ####################################################################################
+ #
+ # atext = ALPHA / DIGIT / ; Any character except controls,
+ # "!" / "#" / ; SP, and specials.
+ # "$" / "%" / ; Used for atoms
+ # "&" / "'" /
+ # "*" / "+" /
+ # "-" / "/" /
+ # "=" / "?" /
+ # "^" / "_" /
+ # "`" / "{" /
+ # "|" / "}" /
+ # "~"
+ # atom = [CFWS] 1*atext [CFWS]
+
+ $atext = "(?:$alpha|$digit|[\\x21\\x23-\\x27\\x2a\\x2b\\x2d\\x2f\\x3d\\x3f\\x5e\\x5f\\x60\\x7b-\\x7e])";
+ $atom = "(?:$cfws?(?:$atext)+$cfws?)";
+
+
+ ####################################################################################
+ #
+ # qtext = NO-WS-CTL / ; Non white space controls
+ # %d33 / ; The rest of the US-ASCII
+ # %d35-91 / ; characters not including "\"
+ # %d93-126 ; or the quote character
+ # qcontent = qtext / quoted-pair
+ # quoted-string = [CFWS]
+ # DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+ # [CFWS]
+ # word = atom / quoted-string
+
+ $qtext = "(?:$no_ws_ctl|[\\x21\\x23-\\x5b\\x5d-\\x7e])";
+ $qcontent = "(?:$qtext|$quoted_pair)";
+ $quoted_string = "(?:$cfws?\\x22(?:$fws?$qcontent)*$fws?\\x22$cfws?)";
+
+ #
+ # changed the '*' to a '+' to require that quoted strings are not empty
+ #
+
+ $quoted_string = "(?:$cfws?\\x22(?:$fws?$qcontent)+$fws?\\x22$cfws?)";
+ $word = "(?:$atom|$quoted_string)";
+
+
+ ####################################################################################
+ #
+ # obs-local-part = word *("." word)
+ # obs-domain = atom *("." atom)
+
+ $obs_local_part = "(?:$word(?:\\x2e$word)*)";
+ $obs_domain = "(?:$atom(?:\\x2e$atom)*)";
+
+
+ ####################################################################################
+ #
+ # dot-atom-text = 1*atext *("." 1*atext)
+ # dot-atom = [CFWS] dot-atom-text [CFWS]
+
+ $dot_atom_text = "(?:$atext+(?:\\x2e$atext+)*)";
+ $dot_atom = "(?:$cfws?$dot_atom_text$cfws?)";
+
+
+ ####################################################################################
+ #
+ # domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
+ # dcontent = dtext / quoted-pair
+ # dtext = NO-WS-CTL / ; Non white space controls
+ #
+ # %d33-90 / ; The rest of the US-ASCII
+ # %d94-126 ; characters not including "[",
+ # ; "]", or "\"
+
+ $dtext = "(?:$no_ws_ctl|[\\x21-\\x5a\\x5e-\\x7e])";
+ $dcontent = "(?:$dtext|$quoted_pair)";
+ $domain_literal = "(?:$cfws?\\x5b(?:$fws?$dcontent)*$fws?\\x5d$cfws?)";
+
+
+ ####################################################################################
+ #
+ # local-part = dot-atom / quoted-string / obs-local-part
+ # domain = dot-atom / domain-literal / obs-domain
+ # addr-spec = local-part "@" domain
+
+ $local_part = "(($dot_atom)|($quoted_string)|($obs_local_part))";
+ $domain = "(($dot_atom)|($domain_literal)|($obs_domain))";
+ $addr_spec = "$local_part\\x40$domain";
+
+
+
+ #
+ # see http://www.dominicsayers.com/isemail/ for details, but this should probably be 254
+ #
+
+ if (strlen($email) > 256) return 0;
+
+
+ #
+ # we need to strip nested comments first - we replace them with a simple comment
+ #
+
+ $email = $this->_rfc3696StripComments($outer_comment, $email, "(x)");
+
+
+ #
+ # now match what's left
+ #
+
+ if (!preg_match("!^$addr_spec$!", $email, $m)){
+
+ return 0;
+ }
+
+ $bits = array(
+ 'local' => isset($m[1]) ? $m[1] : '',
+ 'local-atom' => isset($m[2]) ? $m[2] : '',
+ 'local-quoted' => isset($m[3]) ? $m[3] : '',
+ 'local-obs' => isset($m[4]) ? $m[4] : '',
+ 'domain' => isset($m[5]) ? $m[5] : '',
+ 'domain-atom' => isset($m[6]) ? $m[6] : '',
+ 'domain-literal' => isset($m[7]) ? $m[7] : '',
+ 'domain-obs' => isset($m[8]) ? $m[8] : '',
+ );
+
+
+ #
+ # we need to now strip comments from $bits[local] and $bits[domain],
+ # since we know they're i the right place and we want them out of the
+ # way for checking IPs, label sizes, etc
+ #
+
+ $bits['local'] = $this->_rfc3696StripComments($comment, $bits['local']);
+ $bits['domain'] = $this->_rfc3696StripComments($comment, $bits['domain']);
+
+
+ #
+ # length limits on segments
+ #
+
+ if (strlen($bits['local']) > 64) return 0;
+ if (strlen($bits['domain']) > 255) return 0;
+
+
+ #
+ # restrictuions on domain-literals from RFC2821 section 4.1.3
+ #
+
+ if (strlen($bits['domain-literal'])){
+
+ $Snum = "(\d{1,3})";
+ $IPv4_address_literal = "$Snum\.$Snum\.$Snum\.$Snum";
+
+ $IPv6_hex = "(?:[0-9a-fA-F]{1,4})";
+
+ $IPv6_full = "IPv6\:$IPv6_hex(:?\:$IPv6_hex){7}";
+
+ $IPv6_comp_part = "(?:$IPv6_hex(?:\:$IPv6_hex){0,5})?";
+ $IPv6_comp = "IPv6\:($IPv6_comp_part\:\:$IPv6_comp_part)";
+
+ $IPv6v4_full = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){5}\:$IPv4_address_literal";
+
+ $IPv6v4_comp_part = "$IPv6_hex(?:\:$IPv6_hex){0,3}";
+ $IPv6v4_comp = "IPv6\:((?:$IPv6v4_comp_part)?\:\:(?:$IPv6v4_comp_part\:)?)$IPv4_address_literal";
+
+
+ #
+ # IPv4 is simple
+ #
+
+ if (preg_match("!^\[$IPv4_address_literal\]$!", $bits['domain'], $m)) {
+ if (intval($m[1]) > 255) return 0;
+ if (intval($m[2]) > 255) return 0;
+ if (intval($m[3]) > 255) return 0;
+ if (intval($m[4]) > 255) return 0;
+ } else {
+ #
+ # this should be IPv6 - a bunch of tests are needed here :)
+ #
+
+ while (1) {
+
+ if (preg_match("!^\[$IPv6_full\]$!", $bits['domain'])){
+ break;
+ }
+
+ if (preg_match("!^\[$IPv6_comp\]$!", $bits['domain'], $m)){
+ list($a, $b) = explode('::', $m[1]);
+ $folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
+ $groups = explode(':', $folded);
+ if (count($groups) > 6) return 0;
+ break;
+ }
+
+ if (preg_match("!^\[$IPv6v4_full\]$!", $bits['domain'], $m)) {
+ if (intval($m[1]) > 255) return 0;
+ if (intval($m[2]) > 255) return 0;
+ if (intval($m[3]) > 255) return 0;
+ if (intval($m[4]) > 255) return 0;
+ break;
+ }
+
+ if (preg_match("!^\[$IPv6v4_comp\]$!", $bits['domain'], $m)) {
+ list($a, $b) = explode('::', $m[1]);
+ $b = substr($b, 0, -1); # remove the trailing colon before the IPv4 address
+ $folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
+ $groups = explode(':', $folded);
+ if (count($groups) > 4) return 0;
+ break;
+ }
+
+ return 0;
+ }
+ }
+ } else {
+ #
+ # the domain is either dot-atom or obs-domain - either way, it's
+ # made up of simple labels and we split on dots
+ #
+
+ $labels = explode('.', $bits['domain']);
+
+
+ #
+ # this is allowed by both dot-atom and obs-domain, but is un-routeable on the
+ # public internet, so we'll fail it (e.g. user@localhost)
+ #
+
+ if (count($labels) == 1) return 0;
+
+
+ #
+ # checks on each label
+ #
+
+ foreach ($labels as $label) {
+ if (strlen($label) > 63) return 0;
+ if (substr($label, 0, 1) == '-') return 0;
+ if (substr($label, -1) == '-') return 0;
+ }
+
+
+ #
+ # last label can't be all numeric
+ #
+
+ if (preg_match('!^[0-9]+$!', array_pop($labels))) return 0;
+ }
+
+ return 1;
}
+ /**
+ * RFC3696 Email Parser
+ *
+ * By Cal Henderson <cal@iamcal.com>
+ *
+ * This code is dual licensed:
+ * CC Attribution-ShareAlike 2.5 - http://creativecommons.org/licenses/by-sa/2.5/
+ * GPLv3 - http://www.gnu.org/copyleft/gpl.html
+ *
+ * $Revision: 5039 $
+ */
+ protected function _rfc3696StripComments($comment, $email, $replace = '')
+ {
+ while (1) {
+ $new = preg_replace("!$comment!", $replace, $email);
+ if (strlen($new) == strlen($email)) {
+ return $email;
+ }
+ $email = $new;
+ }
+ }
}
class Horde_Form_Type_matrix extends Horde_Form_Type {