package votorola.g.mail; // Copyright 2008-2009, Les Hazlewood, Michael Allan. Modified from http://www.leshazlewood.com/?p=5. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. A copy of the License is included at votorola/_/licence/Apache-2.0.txt. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. import java.util.regex.Pattern; import votorola.g.lang.*; /** Validation of email addresses using a regular expression pattern. * * @see ietf.org/rfc/rfc822.txt */ @ThreadSafe class AddressValidationP { private AddressValidationP() {} /** Constructs the pattern of a valid email address consisting of the bare addr-spec * having no personal part and no angle braces, and with no domain literals (like * joe@[192.168.1.100]). */ static Pattern newPattern() { // This constant states that domain literals are allowed in the email address, e.g.: // //

someone@[192.168.1.100] or
// john.doe@[23:33:A2:22:16:1F] or
// me@[my computer]

// //

The RFC says these are valid email addresses, but most people don't like allowing them. // If you don't want to allow them, and only want to allow valid domain names // (RFC 1035, x.y.z.com, etc), // change this constant to false. // //

Its default value is true to remain RFC 2822 compliant, but // you should set it depending on what you need for your application. final boolean ALLOW_DOMAIN_LITERALS = false; // This contstant states that quoted identifiers are allowed // (using quotes and angle brackets around the raw address) are allowed, e.g.: // //

"John Smith" <john.smith@somewhere.com> // //

The RFC says this is a valid mailbox. If you don't want to // allow this, because for example, you only want users to enter in // a raw address (john.smith@somewhere.com - no quotes or angle // brackets), then change this constant to false. // //

Its default value is true to remain RFC 2822 compliant, but // you should set it depending on what you need for your application. final boolean ALLOW_QUOTED_IDENTIFIERS = false; // RFC 2822 2.2.2 Structured Header Field Bodies final String wsp = "[ \\t]"; //space or tab final String fwsp = wsp + "*"; //RFC 2822 3.2.1 Primitive tokens final String dquote = "\\\""; //ASCII Control characters excluding white space: final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F"; //all ASCII characters except CR and LF: final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]"; // RFC 2822 3.2.2 Quoted characters: //single backslash followed by a text char final String quotedPair = "(\\\\" + asciiText + ")"; //RFC 2822 3.2.4 Atom: final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]"; final String atom = fwsp + atext + "+" + fwsp; final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*"; final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp; //RFC 2822 3.2.5 Quoted strings: //noWsCtl and the rest of ASCII except the doublequote and backslash characters: final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]"; final String qcontent = "(" + qtext + "|" + quotedPair + ")"; final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote; //RFC 2822 3.2.6 Miscellaneous tokens final String word = "((" + atom + ")|(" + quotedString + "))"; final String phrase = word + "+"; //one or more words. //RFC 1035 tokens for domain names: final String letter = "[a-zA-Z]"; final String letDig = "[a-zA-Z0-9]"; final String letDigHyp = "[a-zA-Z0-9-]"; final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?"; final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}"; //RFC 2822 3.4 Address specification //domain text - non white space controls and the rest of ASCII chars not including [, ], or \: final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]"; final String dcontent = dtext + "|" + quotedPair; final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]"; final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")"; final String domain = ALLOW_DOMAIN_LITERALS ? rfc2822Domain : rfc1035DomainName; final String localPart = "((" + dotAtom + ")|(" + quotedString + "))"; final String addrSpec = localPart + "@" + domain; final String angleAddr = "<" + addrSpec + ">"; final String nameAddr = "(" + phrase + ")?" + fwsp + angleAddr; final String mailbox = nameAddr + "|" + addrSpec; //now compile a pattern for efficient re-use: //if we're allowing quoted identifiers or not: final String patternString = ALLOW_QUOTED_IDENTIFIERS ? mailbox : addrSpec; return Pattern.compile( patternString ); } }