<?php
/*
  * MarkHtml v1.0
  * =============
  *
  * Cleaning the HTML code for the site from malicious code.
  * Removes known XSS code that prohibits the use of styles that can be
  * Damage the appearance of the site.
  *
  * The code is based on [html_filter] (http://savvateev.org/blog/36/)
  * @ License: LGPL
  * @ Date: 2011/04/05
  * @ Author: Vladimir Romanovich <ibnteo@gmail.com>
*/


function markhtml($text, $xhtml = false, $tags) {
      // MarkHtml
      $markhtml = new MarkHtml($text, $xhtml, $tags);       
      $markhtml->filter();  
      return $markhtml->text;
}

class MarkHtml {

    public $tags = array();
    public $tags_closed = array();
    public $tags_attr = array();
    public $xhtml = false;
    public $text = '';
    
    public function MarkHtml($text = '', $xhtml = false, $tags) {
        $this->text = $text;
        $this->xhtml = $xhtml;
        $this->tags = $tags;
        // Single tags
        $this->tags_closed = array('img', 'br', 'hr', 'param', 'input', 'area', 'col', 'isindex');
        // Illegal tags
       // $this->tags = array('script', 'meta', 'link', 'style', 'iframe', 'frameset', 'frame', 'layer', 'xml', 'base', 'bgsound', 'basefont', 'body', 'html', 'head', 'title');
        // Illegal attributes
      
        if (in_array('tags_attribute',$this->tags))
          {
            $this->tags_attr = array(
                'style' => '\\(|\\\\|position:|margin',
                '.*' => 's\s*c\s*r\s*i\s*p\s*t\s*:',
                '^on' => '',
                'src' => 'm\s*h\s*t\s*m\s*l\s*:',
                'type' => 'scriptlet',
                'allowscriptaccess' => 'always|samedomain'        
            );
          }  
    
    }

    // Filter HTML
    public function filter() {
        
        $open_tags_stack = array();
     //   $code = false;
        $link = false;

        // Divide the code into sections and simple text tags
        $seg = array();
        while(preg_match('/<[^<>]+>/siu', $this->text, $matches, PREG_OFFSET_CAPTURE)){
            if ($matches[0][1]) $seg[] = array('seg_type'=>'text', 'value'=>substr($this->text, 0, $matches[0][1]));  
            $seg[] = array('seg_type'=>'tag', 'value'=>$matches[0][0]);
            $this->text = substr($this->text, $matches[0][1]+strlen($matches[0][0]));
        }
        if ($this->text != '') $seg[] = array('seg_type'=>'text', 'value'=>$this->text);

        // Process the received parts
        for ($i=0; $i<count($seg); $i++) {
            // If the site is plain text, shielding it special. HTML characters
            if ($seg[$i]['seg_type'] == 'text') {
                // Gently remove excess &amp;   !!! Not work on Seditio, conflict sed_cc() function  !!!
              // $seg[$i]['value'] = preg_replace('/&amp;([a-z#0-9]+;)/ui', '&$1', htmlentities($seg[$i]['value'], ENT_QUOTES, 'UTF-8'));
            // Тег
            } elseif ($seg[$i]['seg_type'] == 'tag') {
            
                // Type of tag: the opening / closing, name tag, a string of attributes
                preg_match('#^<\s*(/)?\s*([a-z]+:)?([a-z0-9]+)(.*?)>$#siu', $seg[$i]['value'], $matches);
                if (count($matches)==0) {
                    $seg[$i]['seg_type']='text';
                    $i --;
                    continue;
                } elseif ($matches[1]) {
                    $seg[$i]['tag_type']='close';
                } else {
                    $seg[$i]['tag_type']='open';
                }
                if ($seg[$i]['tag_type'] != 'text') {
                    $seg[$i]['tag_ns'] = $matches[2];
                    $seg[$i]['tag_name'] = $matches[3];
                    $seg[$i]['tag_name_lc'] = strtolower($matches[3]);
                }
                /*
                if (($seg[$i]['tag_name_lc']=='code') and ($seg[$i]['tag_type']=='close')) {
                    $code = false;
                } */
                if (($seg[$i]['tag_name_lc']=='a') and ($seg[$i]['tag_type']=='close')) {
                    $link = false;
                }
                
                // The tag inside the <code> convert to text
              /*  if ($code) {
                    $seg[$i]['seg_type'] = 'text';
                    $i--;
                    continue;
                } */

                // Opening tag
                if ($seg[$i]['tag_type'] == 'open') {

                    // Invalid tag showing how the text
                    if (array_search($seg[$i]['tag_name_lc'], $this->tags) !== false) {
                        $seg[$i]['action'] = 'show';
                    }
                    // A valid tag
                    else {
                     /*   if ($seg[$i]['tag_name_lc'] == 'code') $code = true;   */
                        if ($seg[$i]['tag_name_lc'] == 'a') $link = true;
                        
                        // If the tag is not single, write it on the stack opening tag
                        if (array_search($seg[$i]['tag_name_lc'], $this->tags_closed) === false) {
                            array_push($open_tags_stack, $seg[$i]['tag_ns'].$seg[$i]['tag_name']);
                        }
                    }

                    // Handle Attributes
                    preg_match_all('#([a-z]+:)?([a-z]+)(\s*=\s*[\"]\s*(.*?)\s*[\"])?(\s*=\s*[\']\s*(.*?)\s*[\'])?(=([^\s>]*))?#siu', $matches[4], $attr_m, PREG_SET_ORDER);
                    $attr = array();
                    foreach($attr_m as $arr) {
                        $attr_ns = $arr[1];
                        $attr_key = $arr[2];
                        $attr_val = $arr[count($arr)-1];
                        $is_attr = true;
                        if (!(isset($seg[$i]['action']) and $seg[$i]['action'] == 'show')) {
                            // Find the incorrect attributes
                            foreach ($this->tags_attr as $key=>$val) {
                                if (preg_match('/'.$key.'/ui', $attr_key)) {
                                    if ($val == '' or preg_match('/'.$val.'/ui', html_entity_decode($attr_val, ENT_QUOTES, 'UTF-8'))) {
                                        $is_attr = false;
                                        break;
                                    }
                                }
                            }
                        }
                        if ($is_attr) {
                            $attr[$attr_ns.$attr_key] = $attr_val;
                        }
                    }
                    $seg[$i]['attr'] = $attr;
     
                }
                
                // Closing tag
                else {
                    // Invalid closing tag
                    if (array_search($seg[$i]['tag_name_lc'], $this->tags) !== false) {
                        // Remove the excess, we show the forbidden
                        $seg[$i]['action'] = (array_search($seg[$i]['tag_name_lc'], $this->tags_closed) !== false) ? 'del' : 'show';
                    }
                    // Close the single tag
                    elseif (array_search($seg[$i]['tag_name_lc'], $this->tags_closed) !== false) {
                        $seg[$i]['action'] = 'del';
                    }
                    // A valid end tag
                    else {
                        
                      /*  if ($seg[$i]['tag_name_lc'] == 'code') $code = false;   */
                        if ($seg[$i]['tag_name_lc'] == 'a') $link = false;
                        
                        // Opening tag stack is empty
                        if (count($open_tags_stack) == 0) {
                            $seg[$i]['action'] = 'del';
                        }
                        else {
        
                            // Closing tag does not match the opening, adding the closing
                            $tn = array_pop($open_tags_stack);
                            if ($seg[$i]['tag_ns'].$seg[$i]['tag_name'] != $tn) {
                                array_splice($seg, $i, 0, array(array('seg_type'=>'tag', 'tag_type'=>'close', 'tag_name'=>$tn, 'action'=>'add')));  
                            }   
                        }
                    }
                }
            }
        }
                                                                   
        // Close the remaining stack of tags
        foreach (array_reverse($open_tags_stack) as $value) {
            array_push($seg, array('seg_type'=>'tag', 'tag_type'=>'close', 'tag_name'=>$value, 'action'=>'add'));
        }
        
        // Collect filtered code and return it
        $this->text = '';
        foreach ($seg as $segment) {
            if ($segment['seg_type'] == 'text') $this->text .= $segment['value'];
            
            elseif (($segment['seg_type'] == 'tag') and !(isset($segment['action']) and $segment['action'] == 'del')) {
                // The tag will be shown, or inferred as a
                if ((isset($segment['action']) and $segment['action'] == 'show')) {
                    $st = '&lt;';
                    $et = '&gt;';
                } else {
                    $st = '<';
                    $et = '>';
                }
                // Opening tag
                if ($segment['tag_type'] == 'open') {
                    $this->text .= $st.$segment['tag_ns'].$segment['tag_name'];
                    if (isset($segment['attr']) and is_array($segment['attr'])) {
                        foreach ($segment['attr'] as $attr_key=>$attr_val) {
                            // Remove the extra &amp;
                         //   $attr_val = preg_replace('/&amp;([a-z#0-9]+;)/ui', '&$1', htmlentities($attr_val, ENT_QUOTES, 'UTF-8'));
                            $this->text .= ' '.$attr_key.(($this->xhtml or $attr_key != $attr_val) ? '="'.$attr_val.'"' : ''); 
                        }
                    }
                    // Close the single tag
                    if ($this->xhtml and array_search($segment['tag_name'], $this->tags_closed) !== false) $this->text .= " /";
                    $this->text .= $et;
                }
                // Closing tag
                elseif ($segment['tag_type'] == 'close') {
                    $this->text .= $st.'/'.(isset($segment['tag_ns'])?$segment['tag_ns']:'').$segment['tag_name'].$et;
                }
            }
        }
    }           
};
