ecshop中采集各大站商品详细页面信息

2009-09-18 17:46 来源:www.chinab4c.com 作者:ecshop专家

    其实很多时候,很多朋友,需要我帮他们抓各个站的商品信息,于是就写了几个主要的B2C的站,来进行整合。代码如下。

   <?php
function to_info($url){
  $array = array();
  $array['msg'] = '';
  //taobao
  if(strpos($url,"taobao.")){
   $str = file($url);
 if($str){
  $string = join('',$str);
  preg_match("/<img id=\"J_ImgBooth\"\s*src=\"(.*?)\"/i",$string,$img);
  preg_match("/<li>价格:<em>(.*?)<\/em>元<\/li>/i",$string,$price);
  preg_match("/<div\s*class=\"detail-hd\">(.*?)<\/div>/is",$string,$title);
  preg_match("/<a\s*class=\"hCard fn\"[^>]*>(.*?)<\/a>/",$string,$name);
  $array['img'] = $img[1];
  $array['price'] = $price[1];
  $array['title'] = trim(strip_tags($title[1]));
  $array['name'] = trim(($name[1]));
 } else{
  $array['msg'] = 'false';
 }
   
    
  }
  //paipai
  if(strpos($url,"paipai.")){
   $str = file($url);
    if($str){
  $string = join('',$str);
  
  //<em id="commodityCurrentPrice" defaultVal="135.00">135.00</em>
  preg_match("/<em\s*id=\"commodityCurrentPrice\"\s*[^>]*>(.*?)<\/em>/i",$string,$price);
  preg_match("/<h1>(.*?)<\/h1>/i",$string,$title);
  preg_match("/<li id=\"userNickname\">(.*?)<\/li>/is",$string,$name);
  preg_match("/<span\sid=\"shipCost2\">(.*?)<\/span>/i",$string,$shipping1);
  //print_r($shipping1[1]);
  if($shipping1[1]){
   preg_match_all("/(\d|\.)*元\(快递\)/i",iconv('gbk','gb2312',$shipping1[1]),$shipping);
   if($shipping[0][0]){
    preg_match("/(\d|\.)*/",$shipping[0][0],$sp);
    $array[sp] = $sp['0'];
   }
  }
  $name1 = strip_tags($name[1],"<strong>");
  preg_match("/<strong>(.*?)<\/strong>/i",$name1,$name);
  $array['price'] = $price[1];
  $array['name'] =  $name[1];
  $array['title'] = trim(strip_tags($title[1]));
 }else{
  $array['msg'] = 'false';
 }
  }
 
  if(strpos($url,'eachnet.')){
  $str = file($url);
  if($str){
  $string = join('',$str);
   preg_match("/<h1>(.*?)<\/h1>/is",$string,$title);
   preg_match('/formatPrice\(\'([\d\.]*)/i',$string,$price);
   $array['title'] = trim((iconv("utf-8","gbk",$title[1])));
   $array['price'] = $price[1];
  }else{
  $array['msg'] = 'false';
  }
  }
  
    if(strpos($url,'amazon.')){
  $str = file($url);
  if($str){
  $string = join('',$str);
   preg_match("/<h1\s*class=\"DetailTitle\">(.*?)<\/h1>/is",$string,$title);
   preg_match("/<span\s*class=\"OurPrice\">([\d\.]*)/",$string,$price);
   preg_match("/<img\s*id=\"ImageShow\"(.*?)*src=\"(.*?)\"/i",iconv("utf-8","gbk",$string),$img);
   print_r($img);
   $array['title'] = trim((iconv("utf-8","gbk",$title[1])));
   $array['price'] = $price[1];
   $array['img'] = $img[2];
  }else{
  $array['msg'] = 'false';
  }
   }
  
   if(strpos($url,'dangdang.')){
  $str = file($url);
 
  if($str){
  $string = join('',$str);
 
   preg_match("/<span\s*class=\"black000\">(.*?)<\/span>/is",$string,$title);
   preg_match("/<span\s*class=\"redc30\">(.*?)<b>([\d\.]*)(.*?)<\/b><\/span>/i",$string,$price);
   preg_match("/<img\s*src=\"(.*?)\"\s*id=\"img_show_prd\"/",$string,$img);
   $array['title'] = strip_tags($title[1]);
   $array['price'] = $price[2];
   $array['img'] = $img[1];
   if(!$array['title']){
   
   preg_match("/<div\s*class=\"mall_goods_title_text\">(.*?)<\/div>/i",$string,$title1);
   $array['title'] = strip_tags($title1[1]);
   
   preg_match("/<b\s*id=\"salePriceTag\">(.*?)<\/b>/i",$string,$price);
   if($price[1]){
    $array['price'] = str_replace("¥","",$price[1]);
   }
  
   preg_match("/<img\s*id='largePic'\s*src=\"(.*?)\"[^>]*\/>/i",$string,$img);
   if($img){
    $array['img'] = $img[1];
   }
   }
  }else{
  $array['msg'] = 'false';
  }
   }
  
 if(strpos($url,'baidu.')){
   $str = file($url);
   if($str){
    $string = join('',$str);
    preg_match("/<h1>(.*?)<\/h1>/is",$string,$title);
    //<li class="uname"><a href="/shop/88fc04edf20bcd36a5ab21fa/credit" target='_blank'>3c数码城</a>
    preg_match("/<span class=\"price\">(.*?)<\/span>/",$string,$price);
    if($price[1]){
    preg_match_all('/(.*?)<small>(.*?)<\/small>/i',$price[1],$p);
    }
    preg_match("/<li\s*class=\"uname\"><a[^>]*>(.*?)<\/a>/i",$string,$name);
   
    $array['price'] = $p[1][0].$p[2][0];
    $array['title'] = $title[1];
    $array['name'] = $name[1];
   }else{
   $array['msg'] = 'false';
   }
   }


   if(strpos($url,'china-pub.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
     preg_match("/<h1\s*class=\"black15c\">(.*?)<\/h1>/is",$string,$title);
     preg_match("/<span\s*class=\"red\">(.*?)<\/span>/is", $string,$price);
     preg_match_all("/<img\s*src=\'(.*?)\'\s/i",$string,$img);
    
     if($price[1]){
      $array['price'] = str_replace("¥",'',$price[1]);
     }
     $array[title] = trim(strip_tags($title[1]));
     $array['img'] = $img[1][1];
    }else{
   $array['msg'] = 'false';
   }
  }
  if(strpos($url,'360buy.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
     preg_match("/<h1>(.*?)<\/h1>/i",$string,$title);
     preg_match_all("/jqimg=\"(.*?)\"/is", $string, $img);
     if($img){
      $array['img'] = $img[1][1];
     }
     $array[title] = $title[1];
    }else{
   $array['msg'] = 'false';
    }
  }

  if(strpos($url,'139shop.')){
    $file = file($url);
    if($file){
   $string = join("",$file);//<font class='black9'>499元</font>
     preg_match("/<h1>(.*?)<\/h1>/i",$string,$title);
     preg_match("/<font\sclass='black9'>([\d\.]*)元<\/font>/i",$string,$price);
     $array[title] = $title[1];
     $array[price] = $price[1];
    }else{
    $array['msg'] = 'false';
    }
  }
 

 
 
  if(strpos($url,'m18.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
  
     preg_match("/<span\sclass=\"price\">(.*?)<\/span>/i",$string,$price);
     preg_match("/<div\s*class=\"bigpic fl\"><img\s*src=\"(.*?)\"[^>]*><\/div>/i",$string,$img);
     $array[title] = trim(iconv("utf-8","gbk",$title[1]));
     $array[price] = $price[1];
     $array[img]   = $img[1];
    }else{
     $array['msg'] = 'false';
    }
  }
 
  if(strpos($url,'e-giordano.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
     preg_match("/<span\s*id=\"ctl00_ctl00_ContentPlaceMain_ContentPlaceMain_ProductDetail1_labelproductname\"[^>]*>(.*?)<\/span>/is",$string,$title);
     preg_match("/<span\s*id=\"ctl00_ctl00_ContentPlaceMain_ContentPlaceMain_ProductDetail1_TabContainer2_TabPanel1_labNPric\"[^>]*>¥(.*?)<\/span>/i",iconv("utf-8","gbk",$string),$price);
     //onmouseover="PreviewImg(
     preg_match("/onmouseover=\"PreviewImg\(\'(.*?)\'\)\"/is",$string,$img);
     $array[title] = trim(iconv("utf-8","gbk",$title[1]));
     $array[price] = $price[1];
     if($img[1]){
    $array[img] = substr($img[1],0,strpos($img[1],',')-1);
     }
    } else{
   $array['msg'] = 'false'; 
  }
  }

 
  if(strpos($url,'818shyf.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
     preg_match("/<h1[^>]*>(.*?)<\/h1>/is",$string,$title);
     $array[title] =strip_tags($title[1]);
   
     preg_match("/<span\s*class=\"style1\">¥(.*?)<\/span>/i", $string, $price);
     $array[price] = $price[1];
     preg_match("/<td height=\"180\" align=\"center\">(.*?)<\/td>/is", $string, $img);
     if($img[1]){
    preg_match("/<img\s*src=\"(.*?)\"[^>]*\/>/is",$img[1],$img1);
    $array[img] = "http://www.818shyf.com/".$img1[1];
     }
    }else{
   $array['msg'] = 'false';
    }
   
  }

 if(strpos($url,'redbaby.')){
    $file = file($url);
    if($file){
   $string = join("",$file);
     preg_match("/<h1><img[^>]*>(.*?)<\/h1>/is",$string,$title);
    $array[title] =trim($title[1]);
   
     preg_match("/<span\sid=\"price\"\s*class=\"jiage\">(.*?)<\/span>/i",$string,$price);
     $array[price] = $price[1];
   
     preg_match("/<img\s*class=\"curr\"\s*src=\"(.*?)\"[^>]*>/is",$string,$img);
     $array[img] = $img[1];
    }else{
   $array['msg'] = 'false';
    }
  }


 if(strpos($url,'.no5.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
    //<a href="javascript:viewphoto('20090911170305265004157.jpg');">
     preg_match("/<td\sid=\"no5price[\d]*\_\d*\"[^>]*>(.*?)元<\/td>/i", $string, $price);
     preg_match("/<td\svalign=\"top\">(.*?)<\/td>/i",$string,$title);
     preg_match("/<img height=\"250\" width=\"250\" border=\"0\" alt=\".*?\" src=\"(.*?)\"\/>/is",$string,$img);
     $array[title] =$title[1];
     $array[price] = $price[1];
     //no image
    }else{
   $array['msg'] = 'false';
    }
 }


 if(strpos($url,'4inlook.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
     preg_match("/<img id=\"big\"\s*src=\"(.*?)\"/i",$string,$img);
     preg_match("/<span\s*id=\"lblname\">(.*?)<\/span>/i", $string, $title);
     //<span class="font400"><span id="lblSalePrice">173</span>
     preg_match("/<span\s*class=\"font400\">(.*?)<\/span>/is",$string,$price);
    
     $array[title] =$title[1];
     $array[img] = "http://www.4inlook.com/".$img[1];
     $array[price] = strip_tags($price[1]);
    }else{
   $array['msg'] = 'false';
    }
 }


 if(strpos($url,'.x.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
     preg_match("/<div\s*class=\"pro_name\">(.*?)<\/div>/i",$string,$title);
     preg_match("/<span\s*class=\"text_red jiage\">(.*?)<\/span>/i",$string,$price);
     //<img src="http://help.x.com.cn/product_img/pic_mid/N4749.jpg" alt="夜色恋人[201190]" class="pro_img" />
     $array[title] =$title[1];
     if($price[1]){
    $array[price] =  str_replace("[桔 色 价]:¥","",$price[1]);
     } 
     preg_match("/<img\s*src=\"(.*?)\"\s*alt=\"(.*?)\"\s*class=\"pro_img\"[^>]*\/>/i",$string,$img);
     $array[img] = $img[1];
    }else{
   $array['msg'] = 'false';
    }
 }
 
 if(strpos($url,'.7shop24.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
        preg_match("/<div\s*class=\"sj_font_a\">¥(.*?)<\/div>/i",$string,$price);
     preg_match("/<div\s*align=\"center\"\s*class=\"sjname_font\"[^>]*>(.*?)<\/div>/is",$string,$title);
     preg_match("/<IMG\s*src=\"(.*?)\"\s*width=349\s*jqimg=\"(.*?)\">/i",$string,$img);
     $array[title] =trim($title[1]);
     $array[price] = $price[1];
     $array['img'] = "http://www.7shop24.com/".$img[1];
    }else{
   $array['msg'] = 'false';
    }
 }

 if(strpos($url,'.blemall.')){
    $file = file($url);
    if($file){
   $string = join("",$file);
    //id="f_pic" //style="background:url(http://services.blemall.com/pic_show.php?valueKey_Type=U1BfX19fMDAxNzg5MzI==&picType=U1BfX19fMDAxNzg5MzI=&sno=0&temp=1);cursor:pointer;"
     preg_match("/<div\s*class=\"odd_right_pmtxt\"\s*style=\"color:#023bbd\">(.*?)<\/div>/i",$string,$title);
     preg_match("/id=\"f_pic\"\s*style=\"background:url\((.*?)\);cursor:pointer;\"/i",$string,$img);
   
     $array[title] =trim($title[1]);
     $array[img] = $img[1];
    } else{
   $array['msg'] = 'false';
    }
  
 }
 
 if(strpos($url,'.blemall.')){
    $file = file($url);
    if($file){
    $string = join("",$file);
    //id="f_pic" //style="background:url(http://services.blemall.com/pic_show.php?valueKey_Type=U1BfX19fMDAxNzg5MzI==&picType=U1BfX19fMDAxNzg5MzI=&sno=0&temp=1);cursor:pointer;"
     preg_match("/<div\s*class=\"odd_right_pmtxt\"\s*style=\"color:#023bbd\">(.*?)<\/div>/i",$string,$title);
     preg_match("/id=\"f_pic\"\s*style=\"background:url\((.*?)\);cursor:pointer;\"/i",$string,$img);
   
     $array[title] =trim($title[1]);
     $array[img] = $img[1];
    }else{
   $array['msg'] = 'false';
    }
  
 }

 if(strpos($url,'.newegg.')){
    $file = file($url);
    if($file){
   $string = join("",$file);
     preg_match("/<h1\s*[^>]*>(.*?)<\/h1>/i",$string,$title);
     preg_match("/<a id=\"bigImg\" href=\"(.*?)\" class=\"jqzoom\"/i",$string,$img);
     $array[title] =trim($title[1]);
     $array[img] = $img[1];
    }else{
   $array['msg'] = 'false';
    }
 }

 if(strpos($url,'.vancl.')){
    $file = file($url);
    if($file){
   $string = join("",$file);
     /*
     <span id="SellPrice" style="color:#333333;">89</span>*/
     preg_match("/<h2\s*[^>]*>(.*?)<\/h2>/is",$string,$title);
     preg_match("/<a\s*href=\"(.*?)\"\s*class=\"jqzoom\"/i",$string,$img);
     preg_match("/<span\sid=\"SellPrice\"\s*style=\"color:#333333;\">(.*?)<\/span>/i",$string,$price);
     $array[title] =trim(iconv("utf-8","gbk",$title[1]));
     $array[img] = $img[1];
     $array[price] = $price[1];
    } else{
     $array['msg'] = 'false';
    }
  
 }
 

 if(strpos($url,'.wangshanghai.')){
  
  $ch = curl_init($url);
  curl_setopt($ch,CURLOPT_USERAGENT,$_SERVER["HTTP_USER_AGENT"]);
  ob_start();
  curl_exec($ch);
  curl_close($ch);
  $str = ob_get_contents();
  ob_end_clean();
  
  if($str){
   preg_match("/<td\s*class=\"titgd\"\s*colspan=\"2\"\s*valign=\"center\">(.*?)<\/td>/i",$str,$title);
   $array[title] =trim(iconv("utf-8","gbk",$title[1]));
   
   preg_match_all("/<font\s*color=\"#cc0000\">(.*?)<\/font>/i",$str,$price);
   /*<img id='pic_02' src="/SHP00001/frontend/shopdata/SHP00001/images/prod/100117461_02.jpg"
     border="0" width="200" height="200" alt="点击显示大图">*/
   $s = iconv("utf-8","gbk",$price[1][1]);;
   $array[price] = str_replace("¥","",$s);
   preg_match("/<img\s*id=\'pic_02\'\s*src=\"(.*?)\"[^>]*>/is",$str,$img);
   $array['img'] = "http://www.wangshanghai.com".$img[1];
  }else{
   $array['msg'] = 'false';
  }
 }
  
 

  return $array;

}
?>