Security Class

<?php

// ------------------------------------------------------------------------

/**
* Security Class
*
* @package Security
* @subpackage Libraries
* @category Security
*/
class Security {
/**
* Random Hash for protecting URLs
*
* @var string
* @access protected
*/
protected $_xss_hash = '';
/**
* List of never allowed strings
*
* @var array
* @access protected
*/
protected $_never_allowed_str = array(
'document.cookie' => '',
'document.write' => '',
'.parentNode' => '',
'.innerHTML' => '',
'-moz-binding' => '',
'' => '-->',
'<![CDATA[' => '<![CDATA[',
'<comment>' => '<comment>'
);

/**
* List of never allowed regex replacements
*
* @var array
*/
protected $_never_allowed_regex = array(
'javascript\s*:',
'(document|(document\.)?window)\.(location|on\w*)',
'expression\s*(\(|&\#40;)', // CSS and IE
'vbscript\s*:', // IE, surprise!
'wscript\s*:', // IE
'jscript\s*:', // IE
'vbs\s*:', // IE
'Redirect\s+30\d',
"([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
);

public $charset = 'UTF-8';

/**
* Constructor
*/
public function __construct(){}

// --------------------------------------------------------------------

/**
* XSS Clean
*
* Sanitizes data so that Cross Site Scripting Hacks can be
* prevented. This method does a fair amount of work but
* it is extremely thorough, designed to prevent even the
* most obscure XSS attempts. Nothing is ever 100% foolproof,
* of course, but I haven't been able to get anything passed
* the filter.
*
* Note: Should only be used to deal with data upon submission.
* It's not something that should be used for general
* runtime processing.
*
* @link http://channel.bitflux.ch/wiki/XSS_Prevention
* Based in part on some code and ideas from Bitflux.
*
* @link http://ha.ckers.org/xss.html
* To help develop this script I used this great list of
* vulnerabilities along with a few other hacks I've
* harvested from examining vulnerabilities in other programs.
*
* @param string|string[] $str Input data
* @param bool $is_image Whether the input is an image
* @return string
*/
public function xss_clean($str, $is_image = FALSE)
{
// Is the string an array?
if (is_array($str))
{
while (list($key) = each($str))
{
$str[$key] = $this->xss_clean($str[$key]);
}

return $str;
}

// Remove Invisible Characters
$str = $this->remove_invisible_characters($str);

/*
* URL Decode
*
* Just in case stuff like this is submitted:
*
* <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
*
* Note: Use rawurldecode() so it does not remove plus signs
*/
do
{
$str = rawurldecode($str);
}
while (preg_match('/%[0-9a-f]{2,}/i', $str));

/*
* Convert character entities to ASCII
*
* This permits our tests below to work reliably.
* We only convert entities that are within tags since
* these are the ones that will pose security problems.
*/
$str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
$str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);

// Remove Invisible Characters Again!
$str = $this->remove_invisible_characters($str);

/*
* Convert all tabs to spaces
*
* This prevents strings like this: ja vascript
* NOTE: we deal with spaces between characters later.
* NOTE: preg_replace was found to be amazingly slow here on
* large blocks of data, so we use str_replace.
*/
$str = str_replace("\t", ' ', $str);

// Capture converted string for later comparison
$converted_string = $str;

// Remove Strings that are never allowed
$str = $this->_do_never_allowed($str);

/*
* Makes PHP tags safe
*
* Note: XML tags are inadvertently replaced too:
*
* <?xml
*
* But it doesn't seem to pose a problem.
*/
if ($is_image === TRUE)
{
// Images have a tendency to have the PHP short opening and
// closing tags every so often so we skip those and only
// do the long opening tags.
$str = preg_replace('/<\?(php)/i', '<?\\1', $str);
}
else
{
$str = str_replace(array('<?', '?'.'>'), array('<?', '?>'), $str);
}

/*
* Compact any exploded words
*
* This corrects words like: j a v a s c r i p t
* These words are compacted back to their correct state.
*/
$words = array(
'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
'vbs', 'script', 'base64', 'applet', 'alert', 'document',
'write', 'cookie', 'window', 'confirm', 'prompt'
);

foreach ($words as $word)
{
$word = implode('\s*', str_split($word)).'\s*';

// We only want to do this when it is followed bcompilations/kk_best_of_me_vol1_songspk_collection_mp3_songs.htmly a non-word character
// That way valid stuff like "dealer to" does not become "dealerto"
$str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
}

/*
* Remove disallowed Javascript in links or img tags
* We used to do some version comparisons and use of stripos(),
* but it is dog slow compared to these simplified non-capturing
* preg_match(), especially if the pattern exists in the string
*
* Note: It was reported that not only space characters, but all in
* the following pattern can be parsed as separators between a tag name
* and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
* ... however, remove_invisible_characters() above already strips the
* hex-encoded ones, so we'll skip them below.
*/
do
{
$original = $str;

if (preg_match('/<a/i', $str))
{
$str = preg_replace_callback('#<a[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
}

if (preg_match('/<img/i', $str))
{
$str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
}

if (preg_match('/script|xss/i', $str))
{
$str = preg_replace('#</*(?:script|xss).*?>#si', '', $str);
}
}
while ($original !== $str);

unset($original);

// Remove evil attributes such as style, onclick and xmlns
$str = $this->_remove_evil_attributes($str, $is_image);

/*
* Sanitize naughty HTML elements
*
* If a tag containing any of the words in the list
* below is found, the tag gets converted to entities.
*
* So this: <blink>
* Becomes: <blink>
*/
$naughty = 'alert|prompt|confirm|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|button|select|isindex|layer|link|meta|keygen|object|plaintext|style|script|textarea|title|math|video|svg|xml|xss';
$str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);

/*
* Sanitize naughty scripting elements
*
* Similar to above, only instead of looking for
* tags it looks for PHP and JavaScript commands
* that are disallowed. Rather than removing the
* code, it simply converts the parenthesis to entities
* rendering the code un-executable.
*
* For example: eval('some code')
* Becomes: eval('some code')
*/
$str = preg_replace('#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)$(.*?)$#si',
'\\1\\2(\\3)',
$str);

// Final clean up
// This adds a bit of extra precaution in case
// something got through the above filters
$str = $this->_do_never_allowed($str);
$str = str_replace(array('<', '>'), array('<', '>'), $str);

/*
* Images are Handled in a Special Way
* - Essentially, we want to know that after all of the character
* conversion is done whether any unwanted, likely XSS, code was found.
* If not, we return TRUE, as the image is clean.
* However, if the string post-conversion does not matched the
* string post-removal of XSS, then it fails, as there was unwanted XSS
* code found and removed/changed during processing.
*/
if ($is_image === TRUE)
{
return ($str === $converted_string);
}

return $str;
}

// --------------------------------------------------------------------

/**
* Random Hash for protecting URLs
*
* @return string
*/
public function xss_hash()
{
if ($this->_xss_hash == '')
{
mt_srand();
$this->_xss_hash = md5(time() + mt_rand(0, 1999999999));
}

return $this->_xss_hash;
}

// --------------------------------------------------------------------

/**
* HTML Entities Decode
*
* This function is a replacement for html_entity_decode()
*
* The reason we are not using html_entity_decode() by itself is because
* while it is not technically correct to leave out the semicolon
* at the end of an entity most browsers will still interpret the entity
* correctly. html_entity_decode() does not convert entities without
* semicolons, so we are left with our own little solution here. Bummer.
*
* @param string
* @param string
* @return string
*/
public function entity_decode($str, $charset = NULL)
{
if (strpos($str, '&') === FALSE)
{
return $str;
}

static $_entities;

isset($charset) OR $charset = $this->charset;
$flag = ENT_COMPAT;

do
{
$str_compare = $str;

// Decode standard entities, avoiding false positives
if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
{
if ( ! isset($_entities))
{
$_entities = array_map(
'strtolower', get_html_translation_table(HTML_ENTITIES, $flag)
);

// If we're not on PHP 5.4+, add the possibly dangerous HTML 5
// entities to the array manually
if ($flag === ENT_COMPAT)
{
$_entities[':'] = '&colon;';
$_entities['('] = '(';
$_entities[')'] = ')';
$_entities["\n"] = '&newline;';
$_entities["\t"] = '&tab;';
}
}

$replace = array();
$matches = array_unique(array_map('strtolower', $matches[0]));
foreach ($matches as &$match)
{
if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
{
$replace[$match] = $char;
}
}

$str = str_ireplace(array_keys($replace), array_values($replace), $str);
}

// Decode numeric & UTF16 two byte entities
$str = html_entity_decode(
preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
$flag,
$charset
);
}
while ($str_compare !== $str);
return $str;
}

// --------------------------------------------------------------------

/**
* Filename Security
*
* @param string
* @param bool
* @return string
*/
public function sanitize_filename($str, $relative_path = FALSE)
{
$bad = array(
"../",
"",
"<",
">",
"'",
'"',
'&',
'$',
'#',
'{',
'}',
'[',
']',
'=',
';',
'?',
"%20",
"%22",
"%3c", // <
"%253c", // <
"%3e", // >
"%0e", // >
"%28", // (
"%29", // )
"%2528", // (
"%26", // &
"%24", // $
"%3f", // ?
"%3b", // ;
"%3d" // =
);

if ( ! $relative_path)
{
$bad[] = './';
$bad[] = '/';
}

$str = $this->remove_invisible_characters($str, FALSE);
return stripslashes(str_replace($bad, '', $str));
}

// ----------------------------------------------------------------

/**
* Compact Exploded Words
*
* Callback function for xss_clean() to remove whitespace from
* things like j a v a s c r i p t
*
* @param type
* @return type
*/
protected function _compact_exploded_words($matches)
{
return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
}

// --------------------------------------------------------------------

/*
* Remove Evil HTML Attributes (like evenhandlers and style)
*
* It removes the evil attribute and either:
* - Everything up until a space
* For example, everything between the pipes:
* <a |style=document.write('hello');alert('world');| class=link>
* - Everythingerror_reporting(E_ALL);
ini_set('display_errors','on'); inside the quotes
* For example, everything between the pipes:
* <a |style="document.write('hello'); alert('world');"| class="link">
*
* @param string $str The string to check
* @param boolean $is_image TRUE if this is an image
* @return string The string with the evil attributes removed
*/
protected function _remove_evil_attributes($str, $is_image)
{
$evil_attributes = array('on\w*', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime');

if ($is_image === TRUE)
{
/*
* Adobe Photoshop puts XML metadata into JFIF images,
* including namespacing, so we have to allow this for images.
*/
unset($evil_attributes[array_search('xmlns', $evil_attributes)]);
}

do {
$count = $temp_count = 0;

// replace occurrences of illegal attribute strings with quotes (042 and 047 are octal quotes)
$str = preg_replace('/(<[^>]+)(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*(\042|\047)([^\\2]*?)(\\2)/is', '$1', $str, -1, $temp_count);
$count += $temp_count;

// find occurrences of illegal attribute strings without quotes
$str = preg_replace('/(<[^>]+)(?<!\w)('.implode('|', $evil_attributes).')\s*=\s*([^\s>]*)/is', '$1', $str, -1, $temp_count);
$count += $temp_count;
}
while ($count);

$jsMList = Array('onabort', 'onactivate', 'onafterprint', 'onafterupdate', 'onbeforeactivate', 'onbeforecopy', 'onbeforecut', 'onbeforedeactivate', 'onbeforeeditfocus', 'onbeforepaste', 'onbeforeprint', 'onbeforeunload', 'onbeforeupdate', 'onblur', 'onbounce', 'oncellchange', 'onchange', 'onclick', 'oncontextmenu', 'oncontrolselect', 'oncopy', 'oncut', 'ondataavailable', 'ondatasetchanged', 'ondatasetcomplete', 'ondblclick', 'ondeactivate', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onerrorupdate', 'onfilterchange', 'onfinish', 'onfocus', 'onfocusin', 'onfocusout', 'onhelp', 'onkeydown', 'onkeypress', 'onkeyup', 'onlayoutcomplete', 'onload', 'onlosecapture', 'onmousedown', 'onmouseenter', 'onmouseleave', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onmove', 'onmoveend', 'onmovestart', 'onpaste', 'onpropertychange', 'onreadystatechange', 'onreset', 'onresize', 'onresizeend', 'onresizestart', 'onrowenter', 'onrowexit', 'onrowsdelete', 'onrowsinserted', 'onscroll', 'onselect', 'onselectionchange', 'onselectstart', 'onstart', 'onstop', 'onsubmit', 'onunload', 'onevent');

do {
$count = $temp_count = 0;
// replace occurrences of illegal
$str = preg_replace('/(?<!\w)('.implode('|', $jsMList).')\s*=\s*([^\s>]*)/is', '', $str, -1, $temp_count);
$count += $temp_count;
}
while ($count);

return $str;
}

// --------------------------------------------------------------------

/**
* Sanitize Naughty HTML
*
* Callback function for xss_clean() to remove naughty HTML elements
*
* @param array
* @return string
*/
protected function _sanitize_naughty_html($matches)
{
return '<'.$matches[1].$matches[2].$matches[3] // encode opening brace
// encode captured opening or closing brace to prevent recursive vectors:
.str_replace(array('>', '<'), array('>', '<'), $matches[4]);
}

// --------------------------------------------------------------------

/**
* JS Link Removal
*
* Callback function for xss_clean() to sanitize links
* This limits the PCRE backtracks, making it more performance friendly
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
* PHP 5.2+ on link-heavy strings
*
* @param array
* @return string
*/
protected function _js_link_removal($match)
{
return str_replace($match[1],
preg_replace('#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|data\s*:)#si',
'',
$this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
),
$match[0]);
}
// --------------------------------------------------------------------

/**
* JS Image Removal
*
* Callback function for xss_clean() to sanitize image tags
* This limits the PCRE backtracks, making it more performance friendly
* and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
* PHP 5.2+ on image tag heavy strings
*
* @param array
* @return string
*/
protected function _js_img_removal($match)
{
return str_replace($match[1],
preg_replace('#src=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;)|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si',
'',
$this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]))
),
$match[0]);
}

// --------------------------------------------------------------------

/**
* Attribute Conversion
*
* Used as a callback for XSS Clean
*
* @param array
* @return string
*/
protected function _convert_attribute($match)
{
return str_replace(array('>', '<', '\\'), array('>', '<', '\\\\'), $match[0]);
}

// --------------------------------------------------------------------

/**
* Filter Attributes
*
* Filters tag attributes for consistency and safety
*
* @param string
* @return string
*/
protected function _filter_attributes($str)
{
$out = '';
if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
{
foreach ($matches[0] as $match)
{
$out .= preg_replace('#/\*.*?\*/#s', '', $match);
}
}

return $out;
}
// --------------------------------------------------------------------

/**
* HTML Entity Decode Callback
*
* Used as a callback for XSS Clean
*
* @param array
* @return string
*/
protected function _decode_entity($match)
{
// Protect GET variables in URLs
// 901119URL5918AMP18930PROTECT8198
$match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);

// Decode, then un-protect URL GET vars
return str_replace(
$this->xss_hash(),
'&',
$this->entity_decode($match, $this->charset)
);
}
// --------------------------------------------------------------------

/**
* Do Never Allowed
*
* @used-by CI_Security::xss_clean()
* @param string
* @return string
*/
protected function _do_never_allowed($str)
{
$str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);

foreach ($this->_never_allowed_regex as $regex)
{
$str = preg_replace('#'.$regex.'#is', '', $str);
}

return $str;
}

// --------------------------------------------------------------------

/**
* Remove Invisible Characters
*
* This prevents sandwiching null characters
* between ascii characters, like Java\0script.
*
* @access public
* @param string
* @return string
*/
function remove_invisible_characters($str, $url_encoded = TRUE)
{
$non_displayables = array();

// every control character except newline (dec 10)
// carriage return (dec 13), and horizontal tab (dec 09)

if ($url_encoded)
{
$non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
$non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
}

$non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127

do
{
$str = preg_replace($non_displayables, '', $str, -1, $count);
}
while ($count);

return $str;
}

/**
* Strips extra whitespace, images, scripts and stylesheets from output
*
* @param string $str String to sanitize
* @return string sanitized string
*/
function stripAll($str) {
/*
* Is the string an array?
*
*/
if (is_array($str))
{
while (list($key) = each($str))
{
$str[$key] = $this->stripAll($str[$key]);
}
return $str;
}

/*
* Check string not empty
*/
if (empty($str))
{
return $str;
}

//Strips scripts and stylesheets from output
//string String with <script>, <style>, <link>, <img> elements removed.
$str = preg_replace('/(<link[^>]+rel="[^"]*stylesheet"[^>]*>|<img[^>]*>|style="[^"]*")|<script[^>]*>.*?<\/script>|<style[^>]*>.*?<\/style>|/is', '', $str);

//Strip HTML and PHP tags from a string
// Allow 'br', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd'
$str = strip_tags($str, '<br><em><strong><cite><blockquote><code><ul><ol><li><dl><dt><dd>');

//Strips extra whitespace from output
$str = preg_replace('/[\n\r\t]+/', '', $str);
$str = preg_replace('/\s{2,}/u', ' ', $str);;

return $str;
}

}

// END Security Class

/* End of file Class.Security.php */

Coding Style - PHP Code

Friday, August 7, 2015

Security Class

No comments:

Post a Comment

Blog Archive