发布一个用PHP fsockopen写的HTTP下载的类

  作者:bea

如果支持打开远程内容的选项的话,实际上php用fopen或file_get_contents都能获得一个网页的内容,但是默认的函数有个不足的地方就是无法获取HTTP头,这在一些特殊的应用中很不方便,如,有一个链接: http://www.abc.com/showvd.asp?id=18 假如它返回的是一个图片,用默认函数就很难识别,但如果通过HTTP应答头来判断就简单多了,此外如果对方通过Refer来防盗链的话,也是无法获取的,用HTTP类就能完美解决这些问题,而且速度也相差
如果支持打开远程内容的选项的话,实际上php用fopen或file_get_contents都能获得一个网页的内容,但是默认的函数有个不足的地方就是无法获取HTTP头,这在一些特殊的应用中很不方便,如,有一个链接:



http://www.abc.com/showvd.asp?id=18 



假如它返回的是一个图片,用默认函数就很难识别,但如果通过HTTP应答头来判断就简单多了,此外如果对方通过 Refer 来防盗链的话,也是无法获取的,用HTTP类就能完美解决这些问题,而且速度也相差无几。





使用方法:



$hd = new DedeHttpDown();

$hd->OpenUrl("http://www.dedecms.com");

echo $hd->GetHtml();

//如果保存为文件则用 $hd->SaveBin("dede.html");

$hd->Close();



获得http请求头用

$hd->GetHead("key")

设置请求头

$hd->SetHead(key,value); (必须在调用 OpenUrl 之前设定)





代码如下:



<?

/* ---------------------------------------------------------------------

//织梦Http下载类V1.0

//出自:织梦之旅 http://www.dedecms.com

//作者: IT柏拉图

//时间: 2005-11-13 12:39

//声明: 首发在落伍者网站,转载请保留版权信息

--------------------------------------------------------------------- */

class DedeHttpDown

{

        var $m_url = "";

        var $m_urlpath = "";

        var $m_scheme = "http";

        var $m_host = "";

        var $m_port = "80";

        var $m_user = "";

        var $m_pass = "";

        var $m_path = "/";

        var $m_query = "";

        var $m_fp = "";

        var $m_error = "";

        var $m_httphead = "" ;

        var $m_html = "";

        var $m_puthead = "";

        var $BaseUrlPath = "";

        var $HomeUrl = "";

        var $JumpCount = 0;//防止多重重定向陷入死循环

        //

        //初始化系统

        //

        function PrivateInit($url)

        {

                        if($url=="") return ;

                        $urls = "";

                        $urls = @parse_url($url);

                        $this->m_url = $url;

            if(is_array($urls))

            {

                          $this->m_host = $urls["host"];

                          if(!empty($urls["scheme"])) $this->m_scheme = $urls["scheme"];



                          if(!empty($urls["user"])){

                                        $this->m_user = $urls["user"];

                          }



                          if(!empty($urls["pass"])){

                                        $this->m_pass = $urls["pass"];

                          }



                          if(!empty($urls["port"])){

                                        $this->m_port = $urls["port"];

                          }



                          if(!empty($urls["path"])) $this->m_path = $urls["path"];

                          $this->m_urlpath = $this->m_path;



                          if(!empty($urls["query"])){

                                        $this->m_query = $urls["query"];

                                        $this->m_urlpath .= "?".$this->m_query;

                          }

                          $this->HomeUrl = $urls["host"];

                          $this->BaseUrlPath = $this->HomeUrl.$urls["path"];

                          $this->BaseUrlPath = ereg_replace("/([^/]*).(.*)$","/",$this->BaseUrlPath);

                          $this->BaseUrlPath = ereg_replace("/$","",$this->BaseUrlPath);

                 }

        }

        //

        //打开指定网址

        //

        function OpenUrl($url)

        {

                //重设各参数

                $this->m_url = "";

                $this->m_urlpath = "";

                $this->m_scheme = "http";

                $this->m_host = "";

                $this->m_port = "80";

                $this->m_user = "";

                $this->m_pass = "";

                $this->m_path = "/";

                $this->m_query = "";

                $this->m_error = "";

                $this->JumpCount = 0;

                $this->m_httphead = Array() ;

                //$this->m_puthead = "";

                $this->m_html = "";

                $this->Close();

                //初始化系统

                $this->PrivateInit($url);

                $this->PrivateStartSession();

        }

        //

        //打开303重定向网址

        //

        function JumpOpenUrl($url)

        {

                //重设各参数

                $this->m_url = "";

                $this->m_urlpath = "";

                $this->m_scheme = "http";

                $this->m_host = "";

                $this->m_port = "80";

                $this->m_user = "";

                $this->m_pass = "";

                $this->m_path = "/";

                $this->m_query = "";

                $this->m_error = "";

                $this->JumpCount++;

                $this->m_httphead = Array() ;

                $this->m_html = "";

                $this->Close();

                //初始化系统

                $this->PrivateInit($url);

                $this->PrivateStartSession();

        }

        //

        //获得某操作错误的原因

        //

        function printError()

        {

                echo "错误信息:".$this->m_error;

                echo "具体返回头:<br>";

                foreach($this->m_httphead as $k=>$v)

                { echo "$k => $v <br>
"; }

        }

        //

        //判别用Get方法发送的头的应答结果是否正确

        //

        function IsGetOK()

        {

                if( ereg("^2",$this->GetHead("http-state")) )

                {        return true; }

                else

                {

                        $this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br>";

                        return false;

                }

        }

        //

        //看看返回的网页是否是text类型

        //

        function IsText()

        {

                if(ereg("^2",$this->GetHead("http-state"))

                        && eregi("^text",$this->GetHead("content-type")))

                {        return true; }

                else

                {

                        $this->m_error .= "内容为非文本类型或网址重定向<br>";

                        return false;

                }

        }

        //

        //判断返回的网页是否是特定的类型

        //

        function IsContentType($ctype)

        {

                if(ereg("^2",$this->GetHead("http-state"))

                        && $this->GetHead("content-type")==strtolower($ctype))

                {        return true; }

                else

                {

                        $this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br>";

                        return false;

                }

        }

        //

        //用Http协议下载文件

        //

        function SaveToBin($savefilename)

        {

                if(!$this->IsGetOK()) return false;

                if(@feof($this->m_fp)) { $this->m_error = "连接已经关闭!"; return false; }

                $fp = fopen($savefilename,"w");

                while(!feof($this->m_fp)){

                        fwrite($fp,fread($this->m_fp,1024));

                }

                fclose($this->m_fp);



                fclose($fp);

                return true;

        }

        //

        //保存网页内容为Text文件

        //

        function SaveToText($savefilename)

        {

                if($this->IsText()) $this->SaveBinFile($savefilename);

                else return "";

        }

        //

        //用Http协议获得一个网页的内容

        //

        function GetHtml()

        {

                if(!$this->IsText()) return "";

                if($this->m_html!="") return $this->m_html;

                if(!$this->m_fp||@feof($this->m_fp)) return "";

                while(!feof($this->m_fp)){

                        $this->m_html .= fgets($this->m_fp,256);

                }

                @fclose($this->m_fp);

                return $this->m_html;

        }

        //

        //开始HTTP会话

        //

        function PrivateStartSession()

        {



                if(!$this->PrivateOpenHost()){

                        $this->m_error .= "打开远程主机出错!";

                        return false;

                }



                if($this->GetHead("http-edition")=="HTTP/1.1") $httpv = "HTTP/1.1";

                else $httpv = "HTTP/1.0";



                //发送固定的起始请求头GET、Host信息

                fputs($this->m_fp,"GET ".$this->m_urlpath." $httpv
");

                $this->m_puthead["Host"] = $this->m_host;



                //发送用户自定义的请求头

                if(!isset($this->m_puthead["Accept"])) { $this->m_puthead["Accept"] = "*/*"; }

                if(!isset($this->m_puthead["User-Agent"])) { $this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)"; }

                if(!isset($this->m_puthead["Refer"])) { $this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"]; }

                foreach($this->m_puthead as $k=>$v){

                        $k = trim($k);

                        $v = trim($v);

                        if($k!=""&&$v!=""){

                                fputs($this->m_fp,"$k: $v
");

                        }

                }



                //发送固定的结束请求头

                //HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束

                if($httpv=="HTTP/1.1") fputs($this->m_fp,"Connection: Close

");

                else fputs($this->m_fp,"
");



                //获取应答头状态信息

                $httpstas = explode(" ",fgets($this->m_fp,256));

                $this->m_httphead["http-edition"] = trim($httpstas[0]);

                $this->m_httphead["http-state"] = trim($httpstas[1]);

                $this->m_httphead["http-describe"] = "";

                for($i=2;$i<count($httpstas);$i++){

                        $this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);

                }

                //获取详细应答头

                while(!feof($this->m_fp)){

                        $line = trim(fgets($this->m_fp,256));

                        if($line == "") break;

                        $hkey = "";

                        $hvalue = "";

                        $v = 0;

                        for($i=0;$i<strlen($line);$i++){

                                if($v==1) $hvalue .= $line[$i];

                                if($line[$i]==":") $v = 1;

                                if($v==0) $hkey .= $line[$i];

                        }

                        $hkey = trim($hkey);

                        if($hkey!="") $this->m_httphead[strtolower($hkey)] = trim($hvalue);

                }

                //判断是否是3xx开头的应答

                if(ereg("^3",$this->m_httphead["http-state"]))

                {

                        if($this->JumpCount > 3) return;

                        if(isset($this->m_httphead["location"])){

                                $newurl = $this->m_httphead["location"];

                                if(eregi("^http",$newurl)){

                                        $this->JumpOpenUrl($newurl);

                                }

                                else{

                                        $newurl = $this->FillUrl($newurl);

                                        $this->JumpOpenUrl($newurl);

                                }

                        }

                        else

                        {        $this->m_error = "无法识别的转移应答!"; }

                }//

        }

        //

        //获得一个Http头的值

        //

        function GetHead($headname)

        {

                $headname = strtolower($headname);

                if(isset($this->m_httphead[$headname]))

                        return $this->m_httphead[$headname];

                else

                        return "";

        }

        //

        //设置Http头的值

        //

        function SetHead($skey,$svalue)

        {

                $this->m_puthead[$skey] = $svalue;

        }

        //

        //打开连接

        //

        function PrivateOpenHost()

        {

                if($this->m_host=="") return false;

                $this->m_fp = @fsockopen($this->m_host, $this->m_port, &$errno, &$errstr,10);

                if(!$this->m_fp){

                        $this->m_error = $errstr;

                        return false;

                }

                else{

                        return true;

                }

        }

        //

        //关闭连接

        //

        function Close(){

                @fclose($this->m_fp);

        }

        //

        //补全相对网址

        //

        function FillUrl($surl)

  {

    $i = 0;

    $dstr = "";

    $pstr = "";

    $okurl = "";

    $pathStep = 0;

    $surl = trim($surl);

    if($surl=="") return "";

    $pos = strpos($surl,"#");

    if($pos>0) $surl = substr($surl,0,$pos);

    if($surl[0]=="/"){

            $okurl = "http://".$this->HomeUrl."/".$surl;

    }

    else if($surl[0]==".")

    {

      if(strlen($surl)<=2) return "";

      else if($surl[0]=="/")

      {

              $okurl = "http://".$this->BaseUrlPath."/".substr($surl,2,strlen($surl)-2);

            }

      else{

        $urls = explode("/",$surl);

        foreach($urls as $u){

          if($u=="..") $pathStep++;

          else if($i<count($urls)-1) $dstr .= $urls[$i]."/";

          else $dstr .= $urls[$i];

          $i++;

        }

        $urls = explode("/",$this->BaseUrlPath);

        if(count($urls) <= $pathStep)

                return "";

        else{

          $pstr = "http://";

          for($i=0;$i<count($urls)-$pathStep;$i++)

          { $pstr .= $urls[$i]."/"; }

          $okurl = $pstr.$dstr;

        }

      }

    }

    else

    {

      if(strlen($surl)<7)

        $okurl = "http://".$this->BaseUrlPath."/".$surl;

      else if(strtolower(substr($surl,0,7))=="http://")

        $okurl = $surl;

      else

        $okurl = "http://".$this->BaseUrlPath."/".$surl;

    }

    $okurl = eregi_replace("^(http://)","",$okurl);

    $okurl = eregi_replace("/{1,}","/",$okurl);

    return "http://".$okurl;

  }

}

?>


有用  |  无用

猜你喜欢