#native_company# #native_desc#
#native_cta#

Web Mirror

By Ed Martin
on September 12, 2004

Version: 1.1

Type: Full Script

Category: Other

License: GNU General Public License

Description: This is a script that will allow you to view any website from anywhere except all the data will come from you server’s ip not the site you are browsing, for example if you want to go to ebay.com but it is blocked by a filter but you have site that isn’t blocked by the filter you can put this up on you server and go to ebay.com from behind the filter

<?
/*--This is my web_mirror script it will take any
*--web page on the web and display it as if it was
*--coming from this page, so if you enter
*--http://www.google.com, you will see google in you browser but
*--it will show as coming from this page (if your
*--web site is www.blah.com it will appear to any filters or proxies
*--that you are browsing on www.blah.com, not google [ip and url])
*--This is much like a proxy except there is no caching and
*--no configuration needed, all you need is a browser and webserver
*--with php and curl into php
*--
*--Note: this relies heavily on Client URL Library Functions
*--(CURL), witch must be compiled into php, you will get errors
*--saying that there is an undefined function unless CURL
*--is compiled into php
*--
*--Note again: if images don't load but the pages do then
*--make sure there is nothing before the before or after the
*--php opening and closing tags, not even white space
*--
*--Authour: Ed Martin (edman007x [at] mac [dot] com)
*--     Website: http://edman007.teamexe.com/
*--
*--Version 1.1
*-- -- Fixed up the cookie code a little
*-- -- added an option to remove certain tags
*--
*--Version 1.0
*-- -- Added Support for the css import function
*-- -- Changed all the preg_match_all's to preg_replace_callback
*-- -- Removed the 3 stars thing for the matches and replaced them with
*--    regex the anchors ^ and $
*-- -- Added support for stop urls, to allow you to specify exact phrases
*--    that should never be converted to a url pointing to this page
*-- -- Added an automatic way of checking for cURL
*--
*--Version 0.4 Beta
*-- -- Added Support for cookies, the cookies are stored in a session
*--    so they will get deleted after you leave the page for a while
*--    (you can't keep cookies for days)
*-- -- It now uses callbacks with cURL so non-html
*--    is sent to the browser while it is being downloaded by the server
*--    This means images and other files should load faster
*-- -- Fixed bug that caused incorrect url parsing in url with a supported
*--    protocol in it (eg. index.php?redirect=http://example.com/)
*--
*--Version 0.3 Beta
*-- -- Added support for forms
*-- -- changed the tag parsing, it won't allow < or > in ANY
*--    attribute, it allows it to parse the <input src= tag but
*--    the < and > in an attribute are valid html (not xhtml)
*-- -- Fixed the function that removed the headers, 'content-location' header
*--    tripped it up
*-- -- The urls are properly encoded in the html
*-- -- Using iframes the bar to enter a url stays
*--
*--Version 0.2 Beta
*-- -- Fixed a bug with regex for changing the url's
*--    (there are still a few problems)
*--
*--
*--Version: 0.1 Beta 
*-- -- original release
*--
*--
*--
*--License---- (http://www.gnu.org/licenses/gpl.txt)
*--
*--
*--This program is free software; you can redistribute it and/or modify
*--it under the terms of the GNU General Public License as published by
*--the Free Software Foundation; either version 2 of the License, or
*--(at your option) any later version.
*--
*--This program is distributed in the hope that it will be useful,
*--but WITHOUT ANY WARRANTY; without even the implied warranty of
*--MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*--GNU General Public License for more details.
*--
*--You should have received a copy of the GNU General Public License
*--along with this program; if not, write to the Free Software
*--Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
*--USA
*--
*/


//---config--
$username = 'username';
$password = 'xxx';
//a username and password to control access to this page


$this_page = 'http://example.com/web_mirror/web_mirror.php';
//this is the url of this page (used to make all links point to this page)

$url_var_name = 'example_url';
//this is the variable that contains the page, it should be pretty
//unique so it won't interfere with forms

$tags		= 	array(
				'a',
				'area',
				'base',
				'bgsound',
				'embed',
				'frame',
				'iframe',
				'img',
				'link',
				'param',
				'script',
				'input'
				);
$attribute	= 	array('href',//<a>
				'href',//<area>
				'href',//<base>
				'src',//<bgsound>
				'src',//<embed>
				'src',//<frame>
				'src',//<iframe>
				'src',//<img>
				'href',//<link>
				'value',//<param>
				'src',//<script>
				'src'//<input>
				);
//these are the tags and attributes to edit
//do NOT put the <form> tag in, it is done separately



$stop_url = array('param' => //the tag, in this case <param>, param is the key
				array(//now what values are not a url
					'high',
					'low',
					'best',
					'autolow',
					'autohigh',
					'true',
					'false',
					'video/quicktime',
					'myself'
					)
			);
//$stop_url is what values should not be consirded a url
//use this for tags like param that don't nessecarily
//have to have a url, note, all values must be lowercase

$remove_tags = array();
$remove_tags['meta'] = 1;//can stop refreshes that go the the wrong page
//$remove_tags['script'] = 0;//can remove javascript that does not work correct with this script
//$remove_tags['img'] = 1;//can speed up page loads
//$remove_tags is an array of tags to remove completely from the page

//-------Do Not Edit below this line with Knowledge of PHP-------
if (!function_exists('curl_init')){
    die('You Do not have cURL compiled into PHP'
        .'This script requires cURL'
        .'<a href="http://php.net/manual/en/ref.curl.php">Click Here For More Info</a>'
    );
}
//get the url of the page we are retreiving
if ($_SERVER['REQUEST_METHOD'] == 'POST'){
	if (isset($_POST[$url_var_name])){
		$url_name = $_POST[$url_var_name];
	}
} else {
	if (isset($_GET[$url_var_name])){
		$url_name = $_GET[$url_var_name];
	}
}

if (!isset($_SERVER['PHP_AUTH_USER'])){
	header('WWW-Authenticate: Basic realm="Web Mirror"');
	header('HTTP/1.0 401 Unauthorized');
	echo '401 ERROR';
	exit();
} elseif ($_SERVER['PHP_AUTH_USER'] != $username || $_SERVER['PHP_AUTH_PW'] != $password){
	header('WWW-Authenticate: Basic realm="Web Mirror"');
	header('HTTP/1.0 401 Unauthorized');
	echo '401 ERROR';
	exit();
}

if (!isset($url_name)){
	//this is the html that is shown when the page is accessed without a url var
	echo <<<EOF
		<html><head><title>Web Mirror Script</title></head><body>
		<form action="$this_page" target="page">
			<center>URL:<input type="text" name="$url_var_name" size="50" value="http://" /><br />
			<input type="submit" value="Go" /></center>
		</form>
		<iframe width="100%" height="90%" src="$this_page?$url_var_name=" name="page" align="center" />
		</body></html>
EOF;

	exit();
} elseif (empty($url_name)){
	//this is the html that is shown when the page is accessed with an empty url var
	echo <<<EOF
	<html><head><title>Web Mirror Script</title></head><body>
	<center><h1>Welcome!</h1></center>
	</body></html>
EOF;
	exit();
}

ini_set('session.use_cookies', 1);//make sure sessions use cookies
session_start();//start the session
//make the cookie array is there
if (!isset($_SESSION['cookies'])){
	$_SESSION['cookies'] = array();
}
//make sure it is a good url, if not try to correct it
if (preg_match('/^(http|https|ftp)://(.*?)/i',$url_name)){
	$url= $url_name;
} else {
	$url = 'http://'.$url_name;
}
$url_parts = array();
$path_parts = array();
set_location($url);

if ($_SERVER['REQUEST_METHOD'] == 'POST'){
	//send all post data
	$extra_data = '';
	foreach ($_POST as $post_name => $post_data){
		if ($post_name != $url_var_name){
			$extra_data .= urlencode($post_name) . '=' . urlencode($post_data) . '&';
		}
	}
} else {
	$extra_data = '?';
	foreach ($_GET as $get_name => $get_data){
		if ($get_name != $url_var_name){
			$extra_data .= urlencode($get_name) . '=' . urlencode($get_data) . '&';
		}
	}
	if (strstr($url,'?')){
		$url = str_replace('?',$extra_data, $url);
	} else {
		$url = $url.$extra_data;
	}
	
}
clean_cookies();
$cookie_data = read_cookies();
//set some variables for the callback functions
$content_type_header = '';
$location = '';
$page = '';
$data_started = FALSE;
//Start cURL to get the page
$curl_handle = curl_init($url);

//set options
curl_setopt($curl_handle, CURLOPT_HEADER, 0);
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl_handle, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($curl_handle, CURLOPT_HEADERFUNCTION, 'read_header');
curl_setopt($curl_handle, CURLOPT_WRITEFUNCTION, 'read_data');

//send the browser name
curl_setopt($curl_handle,CURLOPT_USERAGENT,$_SERVER['HTTP_USER_AGENT']);//Real Browser
//curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)');//MSIE
//curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Googlebot/2.1 (+http://www.googlebot.com/bot.html)');//googlebot

//add any post data if needed
if ($_SERVER['REQUEST_METHOD'] == 'POST'){
	curl_setopt($curl_handle, CURLOPT_POST, 1);
	curl_setopt($curl_handle, CURLOPT_POSTFIELDS, $extra_data);
}

//send cookies if needed
curl_setopt($curl_handle, CURLOPT_COOKIE, $cookie_data);
//retrieve the page
curl_exec($curl_handle);
curl_close($curl_handle);


//don't go any further if $page is empty, the read_data function did everything
if ($page == '' ){
	die();
}

//update the page url if there was a location header
if ($location != ''){
	set_location($location);
}

//remove all tags in the $remove_tags array
foreach ($remove_tags as $tag => $single_tag){
    if ($single_tag){
        $page = preg_replace("/<$tags.*?>/is", '', $page);
    } else {
        $page = preg_replace("/<$tags.*?>.*></$tag>/is", '', $page);
    }
}

if (stristr($content_type_header,'text/html')!==false){
	//cycle through all tags then the <form tag if it is html
	for ($i = 0;$i < sizeof($tags);$i++){
		$page = edit_tags($tags[$i],$attribute[$i],$page);
	}
	$page = edit_forms($page);
	//this will get all/most the css out of the html and edit it
	$page = preg_replace_callback('!(<style[^>]*?>)(.*?)(</style>)!is', 'edit_style_tag_callback', $page);
} else {
	//the page is css, not html
	header('Content-type: text/css');
	$page = edit_css($page);
}


// send the page to browser
echo $page;



//----------Functions-------------


/*
*---This is the function to edit
*---the html, it will search for a tag and edit it
*/
function edit_tags($tag, $attribute, $html){
	$no_http_pattern ="!(<($tag)s+[^<>]*?$attributes*=s*'?"?)([^<>]*?)('?"?>|'?"?s[^<>]*?>)!is";
	$html = preg_replace_callback($no_http_pattern, 'edit_tags_callback', $html);
	return $html;
}

/*
*--This function will search css and edit the url's
*--in it that are in this form: url(http://example.com/index.html)
*/
function edit_css ($css){
	$pattern = '!(urls*?(s*?'?"?)(.*?)("?'?s*?))!is';
	$css = preg_replace_callback($pattern, 'edit_css_callback',$css);
	$pattern = '!(imports*'?"?s*)(.*?)('?"?s*;)!is';
	$css = preg_replace_callback($pattern, 'edit_css_callback',$css);
	//$pattern2 = '!(imports*'?"?s*)(.*?)('?"?s*;)!is';
	return $css;	
}

/*
*--This function will encode the url in an attribute
*--to point to the right location it will not add
*--the variable if its to be used for a form
*/
function edit_url($old_url, $form){
	// make some varibles global
	global $this_page;
	global $url_parts;
	global $path_parts;
	global $url_var_name;
	$old_url = str_replace ('&amp;', '&',$old_url);
	$old_url = $old_url;
	if (!preg_match('/^(http(s)?://|ftp://)/i', $old_url)){
		if (preg_match('/^/.*?/i', $old_url)){
			$old_url = $this_page."?$url_var_name="
				.urlencode($url_parts['scheme']
				.'://'.$url_parts['host'].$old_url);
		} else {
			$old_url = $this_page."?$url_var_name="
				. urlencode($url_parts['scheme']
				.'://'.$url_parts['host'].$path_parts['dirname']
				.'/'.$old_url);
		}
	} else {
		$old_url = $this_page."?$url_var_name=". urlencode($old_url);
	}
	/*
	*--should add something like a function here to change
	*--the url's and take out the /blah/../ and 
	*--change it to / (the url's are $old_url)
	*/
	if (!$form){
		return $old_url;
	} else {
		return preg_split("!?$url_var_name=!",$old_url);
	}
}


/*
*--This is a function to edit the form tag because
*--the url can't be encoded in the action attribute
*/
function edit_forms($html){
	$html = preg_replace_callback("!(<forms+[^<>]*?actions*=s*'?"?)([^<>]*?)('?"?>|'?"?s[^<>]*?>)!is", 'edit_forms_callback',$html);
	return $html;
}

/*
*--This is the function that will read the headers
*--any location header it finds will be placed in $location
*--any content type header will be placed in $content_type_header
*/
function read_header($ch, $string){
	//headers to find
	global $content_type_header;
	global $location;
	//log headers//error_log("$stringn", 3, '/usr/local/apache/htdocs/redirect.log');
	$length = strlen($string);
	if (stristr($string, 'Location')!==false){
		preg_match('/(^Location:s*)(.*?)rns*/i', $string, $match);
		$location = $match[2];
		unset ($match);
		//$location = str_replace('***','', $location);
	} elseif (stristr($string, 'Content-type')!==false){
		$content_type_header = $string;
		//$content_type_header = str_replace('***','', $content_type_header);
	} elseif(stristr($string, 'Set-cookie')!==false){
		$num = substr_count($string, ';');
		$num++;
		add_cookie($string,$num);
		unset($num);
		
	}
	return $length;
}

/*
*--This is the function to read all the data from curl
*--if the page isn't html it will imeditaly start sending it to the browser
*--if the page is html it will store it in the $page varible
*/
function read_data ($ch, $string){
	global $content_type_header,$page,$data_started;
	//log all data recived//error_log("$stringn", 3, '/usr/local/apache/htdocs/redirect.log');
	$length = strlen($string);
	if (preg_match('/text/(html|css)/i',$content_type_header)){
		$page .= $string;
	} else {
		if (!$data_started){
			header($content_type_header);
			$data_started = TRUE;
		}
		echo $string;
	}
	return $length;
}

/*
*--This function will set the base url for other functions to edit
*--relative url's with
*/
function set_location ($url){
	global $url_parts;
	global $path_parts;
	//read all parts of url  for later use
	//$url_parts['scheme']		= protocol (without ://)
	//$url_parts['host']		= site
	//$path_parts['dirname']	= path
	//$path_parts['basename']	= file (incorrect, has 'blah' append to the end)
	$url_parts = parse_url($url);
	if (isset($url_parts['path'])){
		//the 'blah' is to ensure that pathinfo parses the path correct
		// (directories mess it up)
		$path_parts = pathinfo($url_parts['path'].'blah');
	} else {
		$path_parts['dirname'] = '';
	}
	if ($path_parts['dirname']=='/'){
		$path_parts['dirname'] = '';
	}
	
}

/*
*This function will take a Set-Cookie Header and add it to the $_SESSION varible
*/
function add_cookie($string, $num){
	global $url_parts;
	$pattern = '!Set-Cookie:s+';
	for($i=1;$i<=$num;$i++){
		if ($i==$num){
			$pattern .= '([^=s;]*)=([^;]*)s*!';
		} else {
			$pattern .= '([^=s;]*)=([^;]*);s*';
		}
	}
	preg_match($pattern, $string, $matches);
	$expires = '';
	for ($i = 1; $i<sizeof($matches); $i=$i+2){
		$x = $i + 1;
		if ($matches[$i] == 'domain'){
			$domain = $matches[$x];
		} elseif ($matches[$i] == 'path'){
			$path = $matches[$x];
		} elseif ($matches[$i] == 'expires'){
			$expires = $matches[$x];
		} else {
			$cookie_name = $matches[$i];
			$cookie_value = $matches[$x];
		}
	}
	//set cookie values if not set
	if (!isset($domain)){
		$domain = $url_parts['host'];
	}
	$domain = str_replace("n",'',str_replace("r",'',$domain));
	if ($expires != ''){
		$expires = strtotime($expires);
	} else {
		$expire = NULL;
	}
	if (!isset($path)){
		$path = '/';
	}
	//now put the cookie into the session varible
	$_SESSION['cookies'][$domain][$cookie_name] = array ('value' => $cookie_value, 'path' => trim($path), 'expires' => $expires);
}

/*
*This function delets cookies that have expired
*/
function clean_cookies (){
	foreach($_SESSION['cookies'] as $domain_name => $domain){
		foreach ($domain as $cookie_name =>$cookie){
			if ($cookie['expires'] < time() && $cookie['expires'] != NULL){
				unset($_SESSION['cookies'][$domain_name][$cookie_name]);
			}
		}
	}
}

/*
*This function generates the cooke data to send to the server
*/
function read_cookies(){
	global $url_parts,$path_parts;
	//prepare the host for the regex
	$host = preg_quote($url_parts['host'], '/');
	$curl_cookie_value = '';
	$counter = 0;
	$sent_cookies = array();
	foreach ($_SESSION['cookies'] as $domain_name => $domain){
        //if the first charcter is not dot, then prepend ^ to make the
        //regex require the exact domain, otherwise leave it for wildcard subdomains
        $host = strpos($domain_name, '.') == 1 ? $host : '^'.$host;
		if (preg_match("/$host$/i",$domain_name)){
			foreach ($domain as $cookie_name => $cookie){
                $c_path = preg_quote($cookie['path'], '/');
				if (preg_match("/^$c_path/", $path_parts['dirname'].'/')){
					if (!in_array($cookie_name,$sent_cookies)){
						if ($counter>0){
							$curl_cookie_value .= ";$cookie_name={$cookie['value']}";
						} else {
							$curl_cookie_value .= "$cookie_name={$cookie['value']}";
						}
						//add cookie to list of sent cookies
						$sent_cookies[] = $cookie_name;
						$counter++;
					}
				}
				
			}
		}
	}
	return $curl_cookie_value;
}

//functions for preg_replace callback


function edit_tags_callback($matches){
	global $stop_url;
	//if this tag has a stop value and this is it
	//then return the tag untouched
	$matches[2] = strtolower($matches[2]);
	if (isset($stop_url[$matches[2]])){
		if (in_array(strtolower($matches[3]), $stop_url[$matches[2]])){
			return $matches[0];
		}
	}
    $matches[3] = htmlspecialchars(edit_url($matches[3],0));
    return $matches[1].$matches[3].$matches[4];
}

function edit_style_tag_callback($matches){
    $matches[2] = edit_css($matches[2]);
    return $matches[1].$matches[2].$matches[3];
}

function edit_forms_callback($form_match){
    global $url_var_name;
    list ($form_match[2],$url_input) = edit_url($form_match[2],1);
    $input_tag = '<input type="hidden" name="'
		.$url_var_name.'" value="'
		. htmlspecialchars(urldecode($url_input)).'">';
	return $form_match[1].$form_match[2].$form_match[3].$input_tag;
}

function edit_css_callback($matches){
    $matches[2] = edit_url($matches[2],0);
    return $matches[1].$matches[2].$matches[3];
}


?>