Version: 2.037
Type: Full Script
Category: HTTP
License: GNU General Public License
Description: Grab news and reformat to usable design, from Indonesia’s foremost online news Detikcom at It can also operate as proxy for other detik.usable, caching, and easily extended for other function.
<? $app['name'] = "detik.usable"; $app['version'] = "2.037"; // detik.usable: a fast-download // Author: dody suria wijaya - [email protected] // License: THIS IS A PUBLIC DOMAIN CODE (you may even change the author -- see "Configuration"). // Term of Usage: BY USING THIS SCRIPT, YOU TAKE FULL RESPONSIBILITY OF ANY LEGAL ACTION THAT MAY BE TAKEN. // CONFIGURATION $wp_author = "<a href=mailto:[email protected]>dody suria wijaya</a>"; $app['proxy_mode'] = FALSE; //set this to TRUE to get data from other detik.usable nodes $app['proxy_url'] = ""; //set this to other detik.usable node $app['ads'] = TRUE; //set this to TRUE to display advertisement $app['hosted_by'] = get_current_user();//date ("F d Y H:i:s", getlastmod()); $app['zlib_support'] = extension_loaded('zlib'); $app['update_url'] = ""; $app['cache'] = TRUE; // VARIABLE DEFINITIONS $hari = array('Minggu','Senin','Selasa','Rabu','Kamis',"Jum'at","Sabtu"); $bulan = array('','Januari','Februari','Maret','April','Mei','Juni','July','Agustus','September','Oktober','November','Desember'); $tgl_lengkap = $hari[date("w")].", ".date("j")." ".$bulan[date("n")].date(" Y")."—".date("H:i")." WIB"; $self = $_SERVER['PHP_SELF']; $x = $_REQUEST['x']; $url = $_REQUEST['url']; $as_node = $_REQUEST['as_node']; //client request 1 to ask for serialized news array (default: compressed) $uncompressed = $_REQUEST['uc']; //client set this to 1 when requesting uncompressed stream $query_string = $_SERVER['QUERY_STRING']; $hostname = ""; $no = $_REQUEST['no']; $header_ouput = "<html><head><title>detik.usable: berita cepat ($tgl_lengkap)</title><style>body{font-family:verdana;}.o{font-size:11pt;}.p{font-size:13pt;}h1{font-family:georgia;font-size:18pt;align:center;}.s{color:#991111;font-weight:bold;}</style></head><body bgcolor=#ffffff>"; $list_header_output = "<html><head><title>detik.usable: berita cepat ($tgl_lengkap)</title> <style>body{font-family:verdana,arial;}.i{font-size:10pt;font-weight:bold;font-family:verdana;}.j{font-family:arial;font-size:12pt;font-weight:bold;}.u{font-size:10pt;}.s{color:#991111;font-weight:bold;}a{text-decoration:none;}a:hover{text-decoration:underline;} .button {font-size:10pt;background:#D6E7EF;border-bottom:1px solid #104A7B;border-right:1px solid #104A7B;border-left: 1px solid #AFC4D5;border-top:1px solid #AFC4D5;color:#000066;margin:2;}.d{font-size:smaller;color:#555}#footer {font-size:smaller;clear:both;border:none;background:#e3ebe2;margin-top:20px;padding-left:30px;padding-top:10px;padding-bottom: 10px;}#footer a:link{color:#666666;} #footer a:active,.footer a:hover{color:#006486;}#footer a:visited{color:#949494;}</style></head><body bgcolor=#ffffff>"; $list_top_output = "<h3><a href= title=Home target=_top><span style=color:#991111;>detik</span>.<span style=color:#119911;>usable</span></a> @ <a href=".$self."?".$query_string." class=button>REFRESH</a><br>$tgl_lengkap</h3>"; $temp_pm = "Using Proxy: "; if ($app['proxy_mode']) $temp_pm .= 'Yeah'; else $temp_pm .= 'Nope'; $temp_zlib = "Compression Support: "; if ($app['zlib_support']) $temp_zlib .= 'Yeah'; else $temp_zlib .= 'Nope'; $list_footer = "<div id=footer><a href=mailto:[email protected] target=_top>author</a> | <a href=$temp_orig target=_top>Original page</a> | Generated by <a href= target=_top>{$app['name']} v{$app['version']}</a><BR><small>Host: {$app['hosted_by']} | $temp_pm | $temp_zlib | $temp_stream</div></body></html>"; $error_cant_open = "<p>Unable to connect to Detikcom's server. This can be caused by this problems: <ul> <li>This webserver's IP has been blocked by Detikcom <li>Your webserver is behind firewall <li>Your PHP's setting has disabled socket connection-related functions <li>Detikcom is being swarmed by huge requests and really really busy <li>Detikcom's URL/port has been changed </ul> <p>What ever is the caused, I may not able to help you with this. Thank you. <p><a href=>Visit the original</a> $errstr ($errno)<br>"; $frameset_output = "<html><head><title>detik.usable: berita cepat ($tgl_lengkap)</title></head> <frameset cols="50%,*"> <frame name=c target=m src="$self?x=i"> <frame name=m target=_top src="$self?x=w"> <noframes> <body>Looks like u need the <a href=$self?no=frame>non-frame version</a>.</body> </noframes> </frameset></html>"; $welcomepage_output = "<center><h1><a href= title="detik.usable home" target=_top style=text-decoration:none;> <span style=color:#991111;>detik</span>.<span style=color:#119911;>usable</span></a>: berita <i>cepat</i></h1> <p>Version {$app['version']}<p>dipersembahkan oleh $wp_author</p><p>Produk dari <a href=mailto:[email protected]>dsw s/h</a></p></center> <hr><p>Situs ini ditujukan untuk mendemonstrasikan 'look and feel' dari potensi sebuah situs berita yg usable: cepat, bersih, dan mudah digunakan.<p>Bagi yang ingin nge-detik.usable, bisa copy n paste <a href="$self?x=s" target=_top>source code situs ini</a> (public domain dan cuman 1 file) dan pasang di hosting apapun yg mendukung php, <b>untuk keperluan anda sendiri</b><ul><li><a href=$self?no=frame target=_top>Non-framed version</a> untuk pembenci frame<li><!--<a href=$self?au=1>Check update</a> versi terbaru--><li><a href=$self?cm=1>Pengaturan Cache</a></ul> $new_features</body></html>"; global $fp,$log,$news; // FUNCTIONS function add_log ($string) { global $log; $log[] = "<li>".htmlspecialchars($string); } function dump_log () { global $log,$news; echo "<div style='border:thin solid #ffaaaa;background-color:#ffcccc;font-size:x-small;'><ul>".implode("rn",$log)."</ul></div>"; echo "<pre>"; print_r($news); echo "</pre>"; exit; } function newsdetail_fetch ($pattern_start,$pattern_end) { global $fp; while (!feof ($fp)) //skip non-content to make regmatching later much faster { $buffer = fgets($fp, 65536); if (preg_match($pattern_start,$buffer)) break; } while (!feof ($fp)) //start collecting data until designated sign found { $buffer = fgets($fp, 65536); if (preg_match($pattern_end,$buffer)) break; $buffers .= $buffer; } return $buffers; } function socket_open ($host,$port) { global $fp,$error_cant_open; $timeout = 30; //seconds add_log("sock_open: $host/$port ($timeout s timeout)"); $fp = @fsockopen ($host,$port,$errno,$errstr,$timeout); if (!$fp) { add_log("sock_open: can't connect"); if ($errno == 0) { add_log("sock_open: problem before connect (dns/socket)"); //write_error("Problems occured at pre-connection phase. Some causes: <ol><li>I can't resolve $host hostname to IP address, or <li>I can't create a socket in this computer</ol>"); dump_log(); } else { add_log("sock_open: problem trying to connect (hostname notfound, blocked, downed, busy, or timeout)"); write_error("$errno "$errstr"."); dump_log(); } return FALSE; } else { add_log("sock_open: connected"); return TRUE; } } function sock_send_request ($hostname,$location) { global $fp; $http_req = "GET $location HTTP/1.0rnHost: $hostnamernReferer: http://www.detik.comrnConnection:closernrn"; add_log("http_req: $http_req"); $return = fputs ($fp, $http_req); if ($return == -1) { add_log("http_req: can't send"); return FALSE; } else { add_log("http_req: sent"); return TRUE; } } function sock_recv_header () { global $fp,$http_req; add_log("http_resp_header: receiving..."); while (!feof ($fp)) { $buffer = fgets($fp, 65536 ); if ($buffer == "rn") break; $buffers .= $buffer; } add_log("http_resp_header: $buffers"); //validate buffer if (!preg_match("/200 OK/",$buffers)) { write_error("Invalid HTTP Response"); dump_log(); return false; } else { add_log("http_resp_header: 200 OK"); } return $buffers; } function write_error ($string) { global $error; echo "<p><font color=red><b>ERROR:</b><!--begin-->$string<!--end--></font>"; $error = TRUE; return; } function dump_buffer ($buffers,$title = "") { global $error; if ($error) { echo "<p>Core dump $title...</p>n<pre><!--Start Dump-->n".$buffers."n<!--Stop Dump--></pre>n"; } return; } function str_time_delta ($date_c) { $delta = time() - $date_c; if ($delta < 60) { $satuan = "detik"; $tgl = $delta; } elseif ($delta < 3600) { $satuan = "menit"; $tgl = round($delta/60); } else { $satuan = "jam"; $tgl = sprintf("%01.1f",$delta/3600); } return "$tgl $satuan"; } function ads_parse(&$buffers_orig) { global $app,$news; // parsing advertisements in main page if ($app['ads']) { // get all ad links $regex_ads = "|<a([^>]*)>(.*?)</a>|is"; if (!preg_match_all($regex_ads,$buffers_orig,$ads_res,PREG_SET_ORDER)) { add_log("parser: ads: 1: fail"); dump_log(); } else { add_log("parser: ads: 1: success"); for ($i = 0; $i < count($ads_res); $i++) { // get a href url preg_match("|href="([^"]*)"|is",$ads_res[$i][1],$url_res); //hanya url dengan hostname ad.detik yg diambil if (!preg_match("||is",$url_res[1])) continue; unset($temp); $temp['url'] = $url_res[1]; $name = trim(strip_tags($ads_res[$i][2])); if ($name == "") { //get name from url preg_match("|/[^-]*-([^/]*).ad|i",$ads_res[$i][1],$adsname_res); $name = $adsname_res[1]; } $temp['name'] = $name; $news['ads'][] = $temp; } } } } function ads_view(&$news) { global $app; //echo "<pre>";print_r($news);echo "</pre>"; // view ads if ($app['ads'] and $news['ads'] != '') { echo "<table align=right bgcolor=#B4D0DC border=0 cellspacing=0 width=100><tr><td><table border=0 cellpadding=3 cellspacing=0 width=100%><tr><td bgcolor=#ECF8FF>"; echo "<p class=u><span class=i>Iklan</span>"; foreach ($news['ads'] as $ads) { $url = $ads['url']; $desc = $ads['name']; if (strlen($desc)>10) $desc = substr($desc,0,10).">"; if ($desc == "") $desc = "Iklan"; echo "<br><a href="$url" target=m>$desc</a>"; } echo "</td></tr></table></td></tr></table>"; } } function news_list_view(&$news) { global $list_header_output,$list_top_output,$app,$no; if ($no == "frame") $target = ""; else $target = " target=m"; echo $list_header_output; echo $list_top_output; // start view list ads_view($news); // view headlines foreach ($news['headline'] as $headline) { $headline['url'] = $self."?url=".$headline['url']; if ($headline['subtitle'] != "") $headline['subtitle'] .= " - "; $date = date('H:i',$headline['date']); $date_delta = str_time_delta(strtotime($date)); echo "<p><span class=d>($date)</span> <span class=j><a href={$headline['url']} $target>{$headline['subtitle']}{$headline['title']}</a></span> <span class=d>[$date_delta lalu]</span>"; echo "<br><span class=u>{$headline['summary']}</span>"; } // view prevnews echo "<br>"; foreach ($news['prevnews'] as $headline) { $headline['url'] = $self."?url=".$headline['url']; if ($headline['subtitle'] != "") $headline['subtitle'] .= " - "; $date = date('H:i',$headline['date']); echo "<br><span class=d>($date)</span> <span class=i><a href={$headline['url']} $target>{$headline['subtitle']}{$headline['title']}</a></span>"; //echo "<br><span class=u>{$headline['summary']}</span>"; } // view topic news foreach ($news['topic'] as $topic) { echo "<p><span class=i>{$topic['title']}</span>"; foreach ($topic['news'] as $headline) { $headline['url'] = $self."?url=".$headline['url']; if ($headline['subtitle'] != "") $headline['subtitle'] .= " - "; $date = date('H:i',$headline['date']); echo "<BR><span class=d>($date)</span> <span class=i><a href={$headline['url']} $target>{$headline['subtitle']}{$headline['title']}</a></span>"; } } // view footer global $list_footer,$stream_compress,$location,$hostname; $temp_orig = "http://$hostname/$location"; $temp_stream = "Stream: "; if (!$app['proxy_mode']) $temp_stream .= "N/A"; elseif ($stream_compress) $temp_stream .= "Compressed"; else $temp_stream .= "Uncompressed"; eval("$list_footer = "$list_footer";"); echo $list_footer; } function news_detail_view(&$news) { global $list_header_output,$url,$app; //specialized first paragraph $news['content'] = preg_replace("|<B>(.*?)<P>|is","<span style=font-size:larger><B>1</span><P>",$news['content']); //fix url berita terkait $news['content'] = preg_replace("|<a href=("?)|is","<a href=1$self?url=",$news['content']); echo $list_header_output; echo "<h3>{$news['subtitle']} {$news['title']}</h3>"; echo "<p class=u>{$news['reporter']}</p>"; ads_view($news); echo "<span class=u>".$news['content']."</span>"; //echo "<div id=footer><a href=$url target=_top>Original page</a> | Generated by {$app['name']} v{$app['version']}</div></body></html>"; // view footer global $list_footer,$stream_compress; $temp_orig = $url; $temp_stream = "Stream: "; if (!$app['proxy_mode']) $temp_stream .= "N/A"; elseif ($stream_compress) $temp_stream .= "Compressed"; else $temp_stream .= "Uncompressed"; eval("$list_footer = "$list_footer";"); echo $list_footer; } // START add_log("{$app['name']} v{$app['version']} starting up from {$_SERVER['SERVER_ADDR']}/{$_SERVER['SERVER_PORT']}"); ob_end_flush(); if (isset($url)) { if ($app['proxy_mode']) { $detikusable_mode = 'news_detail_from_node'; } else { $detikusable_mode = 'news_detail'; } } elseif ($x=="i" or $no=="frame") { if ($app['proxy_mode']) { $detikusable_mode = 'news_list_from_node'; //retrieve serialized+processed html containing ready-to-view array from other detik.usable node. } else { $detikusable_mode = 'news_list'; //retrieve raw html from detik, parse, and output as new detik.usable-style design } } elseif ($x=="w") { $detikusable_mode = 'welcome_page'; } elseif ($x=="s") { $detikusable_mode = 'source_code'; } elseif ($_REQUEST['au']) { $detikusable_mode = 'auto_update'; } elseif ($_REQUEST['cm']) { $detikusable_mode = 'cache_management'; } else { $detikusable_mode = 'frame_set'; } add_log("mode: $detikusable_mode"); // DETIK.USABLE IN --NEWS DETAIL MODE-- if ($detikusable_mode == 'news_detail') { $url = $_REQUEST['url']; if (preg_match("/http://([^/]*)(/.*)/",$url,$result)) { $hostname = $result[1]; $location = $result[2]; } else { $location = "/peristiwa".$url; } if ($app['cache']) //check if already in cache { $urls = parse_url($url); $filename = 'cache/'.basename($urls['path']); if (file_exists($filename)) { $buffer = ""; $fp = fopen($filename,'r'); while(!feof($fp)) { $buffer .= fread($fp,1024); } fclose($fp); $news = unserialize($buffer); $news_from_cache = TRUE; } } if ($news_from_cache or !socket_open($hostname,80)) { //do nothing } else { sock_send_request ($hostname,$location); sock_recv_header (); // recv all response body while (!feof ($fp)) { $buffer = fgets($fp, 65536); $buffers .= $buffer; } fclose($fp); $buffers_orig = $buffers; $regex_start = "<blockquote>"; //$regex_end = "<font face="'MS Sans Serif'"; $regex_end = "<!-- FORM"; $regex_1 = "|$regex_start(.*?)$regex_end|is"; if (!preg_match($regex_1,$buffers,$result)) { add_log("parser: newsdetail: 1: fail ($regex_1)"); add_log("parser: $buffers"); } else { add_log("parser: newsdetail: 1: success"); $buffers = $result[1]; } if (preg_match("/berita-foto/",$url)) // this channel is different enough, that need specific pregmathicng { // title $regex_start = "<FONT size=5>"; $regex_end = "</font>"; $regex = "|$regex_start(.*?)$regex_end|is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: title: fail ($regex)"); dump_buffer ($buffers); } else { add_log("parser: newsdetail: title: success"); $news['title'] = $res[1]; } // reporter $regex_start = "<BR><FONT color=#ff0000 size=2>"; $regex_end = "</font>"; $regex = "|$regex_start(.*?)$regex_end|is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: reporter: fail ($regex)"); dump_buffer ($buffers); } else { add_log("parser: newsdetail: reporter: success"); $news['reporter'] = $res[1]; } // content $regex_start = '<P align="Justify">'; $regex = "|$regex_start(.*)|is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: content: fail ($regex)"); dump_buffer ($buffers); } else { add_log("parser: newsdetail: content: success"); $news['content'] = $res[1]; } // 'recondition' urls in content $news['content'] = preg_replace('|<a href=(.?)|',"<a href=1$self?url=",$news['content']); } else { // sub-title $regex_start = "<font class=.?subjudulberita.?>"; $regex_end = "</font>"; $regex = "|$regex_start(.*?)$regex_end|is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: subtitle: fail ($regex)"); } else { add_log("parser: newsdetail: subtitle: success"); $news['subtitle'] = $res[1]; } // title $regex_start = "<font class=.?judulberita.?>"; $regex_end = "</font>"; $regex = "|$regex_start(.*?)$regex_end.*$|is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: title: fail ($regex)"); dump_log(); } else { add_log("parser: newsdetail: title: success"); $news['title'] = $res[1]; } // reporter $regex_start = "<font class=.?textreporter.?>"; $regex_end = "</font>"; $regex = "|$regex_start(.*?)$regex_end|is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: reporter: fail ($regex)"); } else { add_log("parser: newsdetail: reporter: success"); $news['reporter'] = $res[1]; } // content $regex_start = "<font class=.?textberita.?>"; $regex_end = '(?:</font>|$)'; // </font> or end of string $regex = "/$regex_start(.*?)$regex_end/is"; if (!preg_match($regex,$buffers,$res)) { add_log("parser: newsdetail: content: fail ($regex)"); write_error($buffers); dump_log(); } else { add_log("parser: newsdetail: content: success"); $news['content'] = $res[1]; } //clean html $news['reporter'] = strip_tags($news['reporter'],'<b></b><i></i>'); $news['content'] = strip_tags($news['content'],'<b></b><i></i><a></a><p></p><br>'); } ads_parse($buffers_orig); } if ($app['cache'] and !$news_from_cache) //save serialized array to file { if (!file_exists('cache')) mkdir('cache',0755); $urls = parse_url($url); $filename = 'cache/'.basename($urls['path']); $buffer = serialize($news); $fp = fopen($filename,'w'); fwrite($fp,$buffer); fclose($fp); } if ($as_node) { set_magic_quotes_runtime(0); //to avoid null char be converted to $news_serial = serialize($news); if (!$app['zlib_support'] or $uncompressed) echo $news_serial; else echo gzcompress($news_serial); } else { news_detail_view($news); } } if ($detikusable_mode == 'news_list') { //$location = "/tmp/index.htm"; $location = "/index.htm"; if (!socket_open($hostname,80)) { //do nothing } else { sock_send_request($hostname,$location); sock_recv_header(); // recv all response body while (!feof ($fp)) { $buffer = fgets($fp, 65536); $buffers .= $buffer; } fclose($fp); $buffers_orig = $buffers; // narrowing-in to "prevnews" content add_log("parser: prevnews: start"); $regex_prevnews_1 = "=.nmkanal"; $regex_prevnews_2 = "<IMG"; $regex_prevnews = "/$regex_prevnews_1(.*?)$regex_prevnews_2(.*)/s"; unset($result); if (!preg_match($regex_prevnews,$buffers,$result)) { add_log("parser: prevnews: fail ($regex_prevnews)"); add_log("parser: $buffers"); } else { add_log("parser: prevnews: success"); $pn_buf = $result[1]; $buffers = $result[2]; $regex_prevnews_all = "/(d+/d+/d+.*?) WIB.*?<A href="([^"]*)"[^>]*>(.*?)</A>/is"; unset($result); if (!preg_match_all($regex_prevnews_all,$pn_buf,$result)) { add_log("parser: prevnews: all: fail $regex_prevnews_all"); dump_log(); } else { add_log("parser: prevnews: all: success"); for ($i = 0; $i < 7; $i++) { $url = $result[2][$i]; //$date = $result[1][$i]; $date = $url; //from now on, parse date from url $title_temp = $result[3][$i]; // prevnews->date $regex_prevnews_date = "//(dddd)(dd)(dd)-(dd)(dd)(dd).shtml$/i"; if (!preg_match($regex_prevnews_date,$date,$date_res)) { add_log("parser: prevnews: date: fail"); } else { add_log("parser: prevnews: date: success"); $tgl = $date_res; $news['prevnews'][$i]['date'] = mktime($tgl[4],$tgl[5],$tgl[6],$tgl[2],$tgl[3],$tgl[1]); } // prevnews->url // makeit absolute url if (!preg_match("/http:///",$url)) { add_log("parser: prevnews($i): url: add absolute url"); $url = "".$url; } // if link formatted like ...?url=http://.... retrieve the param value instead if (preg_match("/?url=(.*)/",$url,$url_res)) { add_log("parser: prevnews($i): url: get from param"); $url = $url_res[1]; } $news['prevnews'][$i]['url'] = $url; // prevnews->subtitle $regex_prevnews_subtitle = "/nonhlsubJudul.>(.*?)</span>/"; if (!preg_match($regex_prevnews_subtitle,$title_temp,$subtitle_res)) { add_log("parser: prevnews($i): no-subtitle"); } else { add_log("parser: prevnews($i): has subtitle"); $news['prevnews'][$i]['subtitle'] = $subtitle_res[1]; } // prevnews->title $regex_prevnews_title = "/nonhlJudul.>(.*)/"; if (!preg_match($regex_prevnews_title,$title_temp,$title_res)) { add_log("parser: prevnews($i): no-title ($regex_prevnews_title)"); } else { add_log("parser: prevnews($i): has title"); $news['prevnews'][$i]['title'] = $title_res[1]; } } } } // narrowing-in to headline news content add_log("parser: headline: start"); $regex_headline_1 = '<span class="tanggal">([^<]*)<'; $regex_headline_2 = '</td'; $regex_headline = "/{$regex_headline_1}(.*?){$regex_headline_2}(.*)/is"; $regex_headline = '|(<span class="tanggal">.*?)</td(.*)|is'; if (!preg_match($regex_headline,$buffers,$result)) { add_log("parser: headline: fail ($regex_headline) - $buffers"); dump_log(); } else { add_log("parser: headline: success"); $hl_buf = $result[1]; $buffers = $result[2]; #echo "---".$hl_buf."---"; #$regex_headline_all = '|tanggal.>[^,]*,(.*?) WIB<.*?<A href="([^"]*)".*?parent.>(.*?<span class="summary">[^<]*?</span>)|is'; $regex_headline_all = '|tanggal.>[^,]*,(.*?) WIB<.*?<A href="([^"]*)".*?parent.>(.*?<span class="summary">.*?</span>)|is'; if (!preg_match_all($regex_headline_all,$hl_buf,$result)) { add_log("parser: headline: all: fail ($regex_headline_all)"); dump_log(); } else { add_log("parser: headline: all: success"); for ($i = 0; $i < 5; $i++) { $date = $result[1][$i]; $url = $result[2][$i]; $title = $result[3][$i]; // headline->url // makeit absolute url if (!preg_match("/http:///",$url)) { add_log("parser: headline($i): url: add absolute url"); $url = "".$url; } // if link formatted like ...?url=http://.... retrieve the param value instead if (preg_match("/?url=(.*)/",$url,$url_res)) { add_log("parser: headline($i): url: get from param"); $url = $url_res[1]; } $news['headline'][$i]['url'] = $url; // headline->subtitle $regex_headline_subtitle = "/subjudul.>(.*?)</span/is"; if (!preg_match($regex_headline_subtitle,$title,$subtitle_res)) { add_log("parser: headline($i): subtitle: fail"); } else { add_log("parser: headline($i): subtitle: success"); $news['headline'][$i]['subtitle'] = $subtitle_res[1]; } // headline->title $regex_headline_title = "/strJudul.>(.*?)</span/is"; #$regex_headline_title = "/strJudul/is"; if (!preg_match($regex_headline_title,$title,$title_res)) { add_log("parser: headline($i): title: fail ($regex_headline_title)"); dump_log(); } else { add_log("parser: headline($i): subtitle: success"); $news['headline'][$i]['title'] = $title_res[1]; } // headline->summary $regex_headline_summary = "/summary.>(.*?)</span/s"; if (!preg_match($regex_headline_summary,$title,$summary_res)) { add_log("parser: headline($i): summary: fail ($regex_headline_summary)"); dump_log(); } else { add_log("parser: headline($i): summary: success"); $news['headline'][$i]['summary'] = $summary_res[1]; } // headline->date $date = preg_replace('/([0-9]*)/([0-9]*)//','2/1/', $date); $news['headline'][$i]['date'] = strtotime($date); //$news['headline'][$i]['delta'] = str_time_delta(strtotime($date)); } } } // narrowing-in to topic news content add_log("parser: topic: start"); add_log("parser: topic: all"); $regex_topic_all = "/<cfoutput>(.*?)</cfoutput>(.*?)</table>/si"; if (!preg_match_all($regex_topic_all,$buffers,$result)) { add_log("parser: topic: fail ($regex_topic_all)"); dump_log(); } else { add_log("parser: topic: success"); $tp_buff = $result; $count_topic = count($tp_buff[1]); for ($i = 0; $i < $count_topic; $i++) { // topic->title $news['topic'][$i]['title'] = $tp_buff[1][$i]; // $regex_topic_detail = "/90%">(.*?)<a href="([^"]*)".*?"judulhlbawah">(.*?)</font>/is"; if (!preg_match_all($regex_topic_detail,$tp_buff[2][$i],$tpdetail_buff)) { add_log("parser: topic($i): detail: fail"); dump_log(); } else { add_log("parser: topic($i): detail: success"); $titles = $tpdetail_buff[3]; $urls = $tpdetail_buff[2]; $dates = $urls; //date will be parsed from url $count_news = count($tpdetail_buff[1]); for ($j = 0; $j < $count_news; $j++) { // topic->title->title $news['topic'][$i]['news'][$j]['title'] = $titles[$j]; // topic->title->url $regex_topic_url = "/?url=(.*)/"; if (!preg_match($regex_topic_url,$urls[$j],$urls_res)) { //try apakah ini http biasa $regex_topic_url = "|^http://|"; if (!preg_match($regex_topic_url,$urls[$j],$urls_res)) { add_log("parser: topic($i): detail($j): url: fail"); } else { add_log("parser: topic($i): detail($j): url: success (2nd try)"); $news['topic'][$i]['news'][$j]['url'] = $urls[$j]; } } else { add_log("parser: topic($i): detail($j): url: success"); $news['topic'][$i]['news'][$j]['url'] = $urls_res[1]; } // topic->title->date $regex_headline_date = "//(dddd)(dd)(dd)-(dd)(dd)(dd).shtml$/i"; if (!preg_match($regex_headline_date,$dates[$j],$tpdetail_res)) { add_log("parser: topic($i): detail($j): date: fail"); } else { add_log("parser: topic($i): detail($j): date: success"); $tgl = $tpdetail_res; //$news['topic'][$i]['news'][$j]['date'] = date("H:i",mktime($tgl[4],$tgl[5],$tgl[6],$tgl[2],$tgl[3],$tgl[1])); $news['topic'][$i]['news'][$j]['date'] = mktime($tgl[4],$tgl[5],$tgl[6],$tgl[2],$tgl[3],$tgl[1]); } } } } } ads_parse($buffers_orig); } //dump_log(); if ($as_node) { set_magic_quotes_runtime(0); //to avoid null char be converted to $news_serial = serialize($news); if (!$app['zlib_support'] or $uncompressed) echo $news_serial; else echo gzcompress($news_serial); } else { news_list_view($news); } } if ($detikusable_mode == 'news_list_from_node') { $app['proxy_url'] .= "?x=i&as_node=1"; if (!$app['zlib_support']) $app['proxy_url'] .= "&uc=1"; //ask uncompressed stream if i don't support zlib library $fp = fopen($app['proxy_url'],'r'); $buffer = fread($fp,100000); fclose($fp); if ($buffer == "") { write_error('newslist: from node: Unable to download from node'); } else { $buffer_orig = $buffer; if ($app['zlib_support']) { $buffer = @gzuncompress($buffer); if (!buffer) { //write_error('newslist: from node: Unable to uncompress data'); //let's assume it's not gzcompressed add_log('newslist: from node: unable to uncompress data'); $buffer = $bufer_orig; } else { $stream_compress = TRUE; } } $buffer = unserialize($buffer); if (!buffer) { write_error('newslist: from node: Unable to unserialize data'); exit; } if (!is_array($buffer)) { write_error("newslist: from node: Data is not formatted correctly: X{$buffer}X"); exit; } news_list_view($buffer); } } if ($detikusable_mode == 'news_detail_from_node') { $app['proxy_url'] .= "?url=$url&as_node=1"; if (!$app['zlib_support']) $app['proxy_url'] .= "&uc=1"; //ask uncompressed stream if i don't support zlib library $fp = fopen($app['proxy_url'],'r'); $buffer = fread($fp,100000); fclose($fp); if ($buffer == "") { write_error('newsdetail: from node: Unable to download from node'); } else { $buffer_orig = $buffer; if ($app['zlib_support']) { $buffer = @gzuncompress($buffer); if (!buffer) { //write_error('newslist: from node: Unable to uncompress data'); //let's assume it's not gzcompressed add_log('newslist: from node: unable to uncompress data'); $buffer = $bufer_orig; } else { $stream_compress = TRUE; } } $buffer = unserialize($buffer); if (!buffer) { write_error('newsdetail: from node: Unable to unserialize data'); exit; } if (!is_array($buffer)) { write_error('newsdetail: from node: Data is not formatted correctly'); exit; } news_detail_view($buffer); } } if ($detikusable_mode == 'welcome_page') { echo $header_ouput; echo $welcomepage_output; } if ($detikusable_mode == 'source_code') { $loc = $_SERVER['DOCUMENT_ROOT']."/".basename($self); show_source($loc); } if ($detikusable_mode == 'frame_set') { echo $frameset_output; } if ($detikusable_mode == 'auto_update') { if (!$_REQUEST['commit']) { //compare version $fp = fopen($app['update_url'],'r'); while(!feof($fp)) { $buffer = fgets($fp,1024); if (preg_match('/$app['version']s*=s*"([^"]*)"/i',$buffer,$remote_res)) { $remote_version = $remote_res[1]; break; } else { $remote_version = "0"; } } fclose($fp); echo $list_header_output; echo "<h4>Check versi terbaru</h4>"; echo "<ul><li>Versi detik.usable ini: <b>{$app['version']}</b><li>Versi detik.usable terbaru: <b>$remote_version</b></ul>"; if ($remote_version > $app['version']) echo "<p><form method=get action=$self><input type=hidden name=au value=1><input type=hidden name=commit value=1><input type=submit value="Update ke $remote_version"></form>"; else echo "<p>detik.usable ini sudah versi terbaru."; echo "<p><a href=$self?x=w>Back to welcome page</a>"; } else { unset($buffer); $fp = fopen($app['update_url'],'r'); while(!feof($fp)) { $buffer .= fread($fp,1024); } fclose($fp); if ($buffer == "") { write_error("auto_update: Unable to get latest version at {$app['update_url']}"); echo "<p><a href=$self?x=w>Back to welcome page</a>"; } else { $target = $_SERVER['SCRIPT_FILENAME']; $fp = fopen($target,'w'); fwrite($fp,$buffer); fclose($fp); echo $list_header_output; echo "<h4>Update Berhasil</h4>"; echo "<p><a href=$self target=_top>Reload detik.usable</a>"; } } } if ($detikusable_mode == 'cache_management') { if (!$_REQUEST['commit']) { $dirsize = 0; $dh = opendir('cache'); while ($filename = readdir($dh)) if (($file_name != "." && $file_name != "..")) $dirsize += filesize('cache/'.$filename); $cache_size = round($dirsize/1024,2); echo $list_header_output; echo "<h4>Pengaturan Cache</h4>"; echo "Total space yang digunakan cache: ".$cache_size." KB"; if ($cache_size > 0) echo "<p><form method=get action=$self><input type=hidden name=cm value=1><input type=hidden name=commit value=1><input type=submit value="Kosongkan Cache"></form>"; //else echo "<p>detik.usable ini sudah versi terbaru."; echo "<p><a href=$self?x=w>Back to welcome page</a>"; } else { $dh = opendir('cache'); while ($filename = readdir($dh)) if (($file_name != "." && $file_name != "..")) @unlink('cache/'.$filename); echo $list_header_output; echo "<h4>Cache telah dikosongkan</h4>"; echo "<p><a href=$self?x=w>Back to welcome page</a>"; } }