<?php
/**
 * EUriValidator class file
 *
 * @author MetaYii
 * @version 1.0
 * @link http://www.yiiframework.com/
 * @copyright Copyright &copy; 2008 MetaYii
 * @license modified BSD, see below:
 *
 * Copyright © 2008 by MetaYii. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 * - Neither the name of MetaYii nor the names of its contributors may
 *   be used to endorse or promote products derived from this software without
 *   specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 *------------------------------------------------------------------------------
 * 
 * About the RFCs:
 * 
 * The RFC 2396 is Copyright (C) The Internet Society (1998). All Rights
 * Reserved. See the other RFCs linked below for their respective copyright
 * notices and credits.
 */

/**
 * EUriValidator is a validator which uses regular expresions and other checks
 * to validate an URI according to RFC 3986 and related RFCs, and see if it is
 * well formed.
 *
 * Important notes:
 * - This validator works only for absolute URIs.
 * - URIs are a bit tortuous, so be careful even when validated. This includes,
 *   (but not limited to) SQL injections, XSS, code injections or any other
 *   security risk involving an user passing parameters to your script, since
 *   those checks are out of the scope of the class. This class just does a raw
 *   check of the well-formness of the URI, and nothing else.
 *
 * References:
 *
 * @link http://en.wikipedia.org/wiki/URI
 * @link http://en.wikipedia.org/wiki/URI_scheme
 * @link http://www.iana.org/assignments/uri-schemes.html
 * @link http://www.ietf.org/rfc/rfc3986.txt
 * @link http://www.ietf.org/rfc/rfc4395.txt
 * @link http://www.ietf.org/rfc/rfc2616.txt
 * @link http://www.ietf.org/rfc/rfc4395.txt
 * @link http://snipplr.com/view/6889/regular-expressions-for-uri-validationparsing/
 *
 * @author MetaYii
 */
class EUriValidator extends CValidator
{
   //***************************************************************************
   // Configurable parameters
   //***************************************************************************

   /**
    * The validation can:
    * 
    * - uri: check if an URI is well formed, as far and strictly as possible.
    * - web: check if an URI is well formed, real world WWW URI (http or https).
    * - mail: check if an URI is well formed, real world mailto: URI.
    *
    * @var string
    */
   private $mode = 'uri';

   /**
    * Whetever to allow empty URIs
    *
    * @var boolean
    */
   private $allowEmptyURI = true;

   /**
    * Should we validate the schema against the IANA registered schemas?
    *
    * @var boolean
    */
   private $validateIANA = true;

   /**
    * Should we validate if the port is between 0 and 65535?
    *
    * @var boolean
    */
   private $validatePortRange = true;

   /**
    * The user can define which schemes are valid,
    * for example array('http','ftp')
    *
    * @var array of string
    */
   private $customSchemes = array();

   /**
    * The user can define which ports are valid,
    * for example array(80,21)
    *
    * @var array of integer
    */
   private $customPorts = array();

   //***************************************************************************
   // Local properties
   //***************************************************************************

   /**
    * URI Schemes defined by IANA
    * @link http://www.iana.org/assignments/uri-schemes.html
    *
    * @var array
    */
   private $validIANASchemes = array(
                                    'aaa',
                                    'aaas',
                                    'acap',
                                    'cap',
                                    'cid',
                                    'crid',
                                    'data',
                                    'dav',
                                    'dict',
                                    'dns',
                                    'fax',
                                    'file',
                                    'ftp',
                                    'go',
                                    'gopher',
                                    'h323',
                                    'http',
                                    'https',
                                    'iax',
                                    'icap',
                                    'im',
                                    'imap',
                                    'info',
                                    'ipp',
                                    'iris',
                                    'iris.beep',
                                    'iris.xpc',
                                    'iris.xpcs',
                                    'iris.lwz',
                                    'ldap',
                                    'mailto',
                                    'mid',
                                    'modem',
                                    'msrp',
                                    'msrps',
                                    'mtqp',
                                    'mupdate',
                                    'news',
                                    'nfs',
                                    'nntp',
                                    'opaquelocktoken',
                                    'pop',
                                    'pres',
                                    'rtsp',
                                    'service',
                                    'shttp',
                                    'sip',
                                    'sips',
                                    'snmp',
                                    'soap.beep',
                                    'soap.beeps',
                                    'tag',
                                    'tel',
                                    'telnet',
                                    'tftp',
                                    'thismessage',
                                    'tip',
                                    'tv',
                                    'urn',
                                    'vemmi',
                                    'xmlrpc.beep',
                                    'xmlrpc.beeps',
                                    'xmpp',
                                    'z39.50r',
                                    'z39.50s',
                                    'afs',
                                    'dtn',
                                    'mailserver',
                                    'pack',
                                    'tn3270',
                                    'prospero',
                                    'snews',
                                    'videotex',
                                    'wais',
                                   );

   //***************************************************************************
   // Setters and getters
   //***************************************************************************

   /**
    * Setter 
    *
    * @param string $value 
    */
   public function setMode($value)
   {
      if (!in_array($value, array('uri', 'web', 'mail')))
         throw new CException(Yii::t('EUriValidator', 'mode must be one of: "uri", "web", "mail"'));
      $this->mode = $value;
   }

   /**
    * Getter
    *
    * @return string
    */
   public function getMode()
   {
      return $this->mode;
   }

   /**
    * Setter
    *
    * @param boolean $value 
    */
   public function setAllowEmptyURI($value)
   {
      if (!is_bool($value))
         throw new CException(Yii::t('EUriValidator', 'value must be boolean'));
      $this->allowEmptyURI = $value;
   }

   /**
    * Getter
    *
    * @return boolean
    */
   public function getAllowEmptyURI()
   {
      return $this->allowEmptyURI;
   }

   /**
    * Setter
    *
    * @param boolean $value
    */
   public function setValidateIANA($value)
   {
      if (!is_bool($value))
         throw new CException(Yii::t('EUriValidator', 'value must be boolean'));
      $this->validateIANA = $value;
   }

   /**
    * Getter
    *
    * @return boolean
    */
   public function getValidateIANA()
   {
      return $this->validateIANA;
   }

   /**
    * Setter
    *
    * @param boolean $value
    */
   public function setValidatePortRange($value)
   {
      if (!is_bool($value))
         throw new CException(Yii::t('EUriValidator', 'value must be boolean'));
      $this->validatePortRange = $value;
   }

   /**
    * Getter
    *
    * @return boolean
    */
   public function getValidatePortRange()
   {
      return $this->validatePortRange;
   }

   /**
    * Setter
    *
    * @param array $value
    */
   public function setCustomSchemes($value)
   {
      if (!is_array($value))
         throw new CException(Yii::t('EUriValidator', 'value must be an array'));
      $this->customSchemes = $value;
   }

   /**
    * Getter
    *
    * @return array
    */
   public function getCustomSchemes()
   {
      return $this->customSchemes;
   }

   /**
    * Setter
    *
    * @param array $value
    */
   public function setCustomPorts($value)
   {
      if (!is_array($value))
         throw new CException(Yii::t('EUriValidator', 'value must be an array'));
      $this->customPorts = $value;
   }

   /**
    * Getter
    *
    * @return array
    */
   public function getCustomPorts()
   {
      return $this->customPorts;
   }

   //***************************************************************************
   // Private utilities
   //***************************************************************************

   /**
    * Check if it contains just the allowed characters and if the hexadecimal
    * chars are well formed.
    * @link http://tools.ietf.org/html/rfc3986#section-2
    *
    * @param string $uri the URI
    * @return boolean
    */
   private function checkChars($uri)
   {
      if (preg_match("/[^a-z0-9\?\#\[\]\@\!\&\'\(\)\*\+\,\;\=\.\-\_\~\%\:\/\$]/i", $uri) ||
          preg_match("/%[^0-9a-f]/i", $uri) ||
          preg_match("/%[0-9a-f](:?[^0-9a-f]|$)/i", $uri)) {
         return false;
      }
      return true;
   }

   /**
    * Check the port
    *
    * @param string $port the port part of the URI
    * @return boolean
    */
   private function checkPort($port)
   {
      if (!empty($port)) {
         /**
          * Check the port, we can check the port range too
          */
         $port = intval($port);
         if (!is_int($port) || ($this->validatePortRange && !($port >= 0 && $port <= 65535))) {
            return false;
         }
         /**
          * Check if the port is one of the custom allowed ports
          */
         if (!empty($this->customPorts)) {
            if (!in_array($port, $this->customPorts)) {
               return false;
            }
         }
      }
      return true;
   }

   //***************************************************************************
   // Validation methods
   //***************************************************************************

   /**
    * Validates a URI
    *
    * @param string $uri the URI
    * @return boolean
    */
   protected function validateUri($uri)
   {
      /**
       * Empty URI!
       */
      if (empty($uri)) {
         return $this->allowEmptyURI;
      }

      if (!$this->checkChars($uri)) {
         return false;
      }

      /**
       * Parse the URI. Parts are kept in $parts in the following order:
       *
       * 1   = scheme
       * 2   = userinfo
       * 3   = host
       * 4   = port
       * 5,6 = path (5 if it has an authority, 6 if it doesn't)
       * 7   = query
       * 8   = fragment
       */
      if (!preg_match("/^([a-z0-9\+\.\-]+):(?:\/\/(?:((?:[a-z0-9\-\._\~!\$\&\'\(\)\*\+\,\;\=\:]|%[0-9A-F]{2})*)\@)?((?:[a-z0-9-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]|%[0-9A-F]{2})*)(?::(\d*))?(\/(?:[a-z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\@\/]|%[0-9A-F]{2})*)?|(\/?(?:[a-z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\@]|%[0-9A-F]{2})+(?:[a-z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\@\/]|%[0-9A-F]{2})*)?)(?:\?((?:[a-z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\/\?\@]|%[0-9A-F]{2})*))?(?:\#((?:[a-z0-9\-\._\~\!\$\&\'\(\)\*\+\,\;\=\:\/\?\@]|%[0-9A-F]{2})*))?$/i", $uri, $parts)) {
         return false;
      }

      /**
       * Assign the parts to variables
       */
      $scheme = strtolower($parts[1]);
      $userinfo = $parts[2];
      $host = $parts[3];
      $port = $parts[4];
      $authority = ((!empty($userinfo))?$userinfo.'@':'').$host.(!empty($port)?':'.$port:'');
      if (!empty($authority)) {
         $path = $parts[5];
         // RFC 2616
         if (!preg_match("!^/!", $path)) {
            return false;
         }
      }
      else {
         $path = $parts[6];
         // RFC 2616
         if (preg_match("!^//!", $path)) {
            return false;
         }
      }      
      $query = $parts[7];
      $fragment = $parts[8];

      /**
       * Scheme must not be empty and must begin with a letter
       */
      if (empty($scheme) || !preg_match("!^[a-z][a-z0-9\+\-\.]*$!", $scheme)) {
         return false;
      }

      if ($this->validateIANA) {
         /**
          * Check if the scheme is one of the IANA-registered schemes
          */
         if (!in_array($scheme, $this->validIANASchemes)) {
            return false;
         }
      }
      
      if (!empty($this->customSchemes)) {
         /**
          * Check if the scheme is one of the custom allowed schemes
          */
         if (!in_array($scheme, $this->customSchemes)) {
            return false;
         }
      }

      if (!$this->checkPort($port)) {
         return false;
      }
      
      return true;
   }

   /**
    * Validates a WWW URI (http or https schemes)
    *
    * @param string $uri the URI
    * @return boolean
    */
   protected function validateWWWUri($uri)
   {
      /**
       * Empty URI!
       */
      if (empty($uri)) {
         return $this->allowEmptyURI;
      }

      if (!$this->checkChars($uri)) {
         return false;
      }
      
      /**
       * Parse the URI. Parts are kept in $parts in the following order:
       *
       * 1   = scheme
       * 2   = userinfo
       * 3   = host
       * 4   = port
       * 5   = path
       * 6   = query
       * 7   = fragment
       *
       * We match only if scheme is "http" or "https"
       */
      if (!preg_match("/^(https?):\/\/(?:((?:[a-z0-9\-\._\~!\$\&\'\(\)\*\+\,\;\=\:]|%[0-9A-F]{2})*)\@)?((?:[a-z0-9\.\-]|%[0-9A-F]{2}){3,})(?::(\d+))?((?:\/(?:[a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\@]|%[0-9A-F]{2})*)*)(?:\?((?:[a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\/\?\@]|%[0-9A-F]{2})*))?(?:\#((?:[a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\/\?\@]|%[0-9A-F]{2})*))?$/i", $uri, $parts)) {
         return false;
      }

      /**
       * Assign the parts to variables
       */
      $scheme = strtolower($parts[1]);
      $userinfo = $parts[2];
      $host = $parts[3];
      $port = $parts[4];
      $authority = ((!empty($userinfo))?$userinfo.'@':'').$host.(!empty($port)?':'.$port:'');
      $path = $parts[5];
      $query = $parts[6];
      $fragment = $parts[6];

      /**
       * Authority must exist
       */
      if (empty($authority)) {
         return false;
      }

      if (!$this->checkPort($port)) {
         return false;
      }

      return true;
   }

   protected function validateMailtoUri($uri)
   {
      /**
       * Empty URI!
       */
      if (empty($uri)) {
         return $this->allowEmptyURI;
      }

      if (!$this->checkChars($uri)) {
         return false;
      }

      if (!preg_match("/^(mailto):((?:[a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\@]|%[0-9A-F]{2})+)?(?:\?((?:[a-z0-9\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=\:\/\?\@]|%[0-9A-F]{2})*))?$/i", $uri, $parts)) {
         return false;
      }
      
      return true;
   }

   //***************************************************************************
   // Validator
   //***************************************************************************

	/**
	 * Validates the attribute of the object.
	 * If there is any error, the error message is added to the object.
	 * @param CModel the object being validated
	 * @param string the attribute being validated
	 */
	protected function validateAttribute($object, $attribute)
   {
      $valid = false;
      if (is_object($object) && isset($object->$attribute)) {
         switch ($this->mode) {
            case 'uri':
               $valid = $this->validateUri($object->$attribute);
               break;
            
            case 'web':
               $valid = $this->validateWWWUri($object->$attribute);
               break;

            case 'mail':
               $valid = $this->validateMailtoUri($object->$attribute);
               break;

            default:
               throw new CException(Yii::t('EUriValidator', 'Invalid mode'));
               break;
         }
      }
      if (!$valid) {
         $message = $this->message !== null ? $this->message : Yii::t('EUriValidator', 'The URI is invalid');
         $this->addError($object, $attribute, $message);
      }
   }
}