argument('type'); $count = explode('-',$type); if(is_array($count) && count($count) == 3){ $start = $count[1]; $end = $count[2]; } if($type == 'list'){ self::getList(); } else if($type == 'detail'){ self::getDetail($start,$end); } else if($type == 'findlose'){ self::getloseinfo(); } } public static function getList() { $last = Last::where(['type'=>1])->first(); $communites = Links::where(['type'=>1]); if(!empty($last) && $last->last_id !== 0 ){ $communites->where('id','>=',$last->last_id); } $communites = $communites->get(['links']); foreach ($communites as $communite){ if(empty($last)){ Last::insert(['type'=>2,'last_id'=>$communite->id]); } else { Last::where(['type'=>2])->update(['last_id'=>$communite->id]); } for($i=1;$i<100;$i++){ $areas = QueryList::get(self::DOMAIN.$communite->links.'pg'.$i,null,[ 'headers' => [ 'Referer'=>'http://www.baidu.com', 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', 'Accept-Encoding' => 'gzip, deflate, br', ] ])->find('.xiaoquListItem .title a')->attrs('href'); if(empty($areas->toArray())){ Log::info('失效页数'.self::DOMAIN.$communite->links.'pg'.$i); break; } foreach ($areas as $area){ $is_have = Links::where(['links'=>$area])->value('id'); if($is_have) continue; Links::insert(['links'=>$area,'type'=>2]); } sleep(rand(2,6)); } } // self::getDetail(); dd('end'); } public static function getDetail($start=0,$end=0) { $rules = [ // 楼盘名称 'build_name' => ['.detailTitle','text'], //市区 'district' => ['.l-txt a:eq(2)','text'], //社区 'community' => ['.l-txt a:eq(3)','text'], //简单地址 'sample_address' => ['.detailDesc','text'], //小区价格 'price'=>['.xiaoquUnitPrice','text'], //小区图片 'images' => ['.imgThumbnailList img','src'], //建成年份 'completed' => ['.xiaoquInfo .xiaoquInfoItem:eq(0) span:eq(1)','text'], //建筑结构 'structure_type' => ['.xiaoquInfo .xiaoquInfoItem:eq(1) span:eq(1)','text'], //物业公司 'tenement' => ['.xiaoquInfo .xiaoquInfoItem:eq(3) span:eq(1)','text'], //栋数 'bulid_num' => ['.xiaoquInfo .xiaoquInfoItem:eq(5) span:eq(1)','text'], //户数 'households' => ['.xiaoquInfo .xiaoquInfoItem:eq(6) span:eq(1)','text'], ]; if($start > $end){ dd('最后的数必须大于前面的数字'); } if($end==5000){ $type = 2; } else if($end == 10000) { $type = 3; } else if($end == 15200){ $type = 4; } else { dd('位置类型'); } $last = Last::where(['type'=>$type])->first(); // dd($last->toArray()); if(!empty($last) && $last->last_id !== 0 ){ $start = $last->last_id ; } $link_info = Links::whereBetween('id',[$start,$end]); $link_info->where(['type'=>2])->chunk(100,function ($links) use ($rules,$last,$type){ dd($links->toArray()); try{ foreach ($links as $link){ echo '爬取详情链接'.$link->links.PHP_EOL; $html = QueryList::get($link->links,null,[ 'headers' => [ 'Referer'=>'http://www.baidu.com', 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', 'Accept-Encoding' => 'gzip, deflate, br', ] ]); $build = $html->rules($rules)->query()->getData(); $images = $html->find('.imgThumbnailList img')->attrs('src'); sleep(rand(3,10)); if(!empty($images)) $build['images'] = json_encode($images); if(empty($build->toArray())) continue; (!empty($build['bulid_num'])) ? $build['bulid_num'] = intval($build['bulid_num']) : $build['bulid_num'] = 0; (!empty($build['completed'])) ? $build['completed'] = intval($build['completed']) : $build['completed'] = 0; (!empty($build['price'])) ? $build['price'] = intval($build['price']) : $build['price'] = 0; if(empty($build['households'])) continue; if(!empty($build['households'])) $build['households'] = intval($build['households']); $is_have = BuildInfo::where(['build_name'=>$build['build_name']])->value('id'); if($is_have){continue;} if(empty($last)){ Last::insert(['type'=>$type,'last_id'=>$link->id]); } else { Last::where(['type'=>$type])->update(['last_id'=>$link->id]); } $build['link_id'] = $link->id; BuildInfo::insert($build->toArray()); } } catch(Exception $e) { Log::info('爬取详情链接'.$link->links.PHP_EOL.'报错'); dd($e->getMessage()); } }); dd('end'); } public function getloseinfo(){ $rules = [ //小区价格 'price'=>['.xiaoquUnitPrice','text'], ]; BuildInfo::where('price','=',null)->chunk(100,function ($builds) use ($rules){ try{ foreach ($builds as $build){ sleep(rand(3,10)); $linkInfo = Links::where(['id'=>$build->link_id])->first(); echo '爬取详情链接'.$linkInfo->links.PHP_EOL; $html = QueryList::get($linkInfo->links,null,[ 'headers' => [ 'Referer'=>'http://www.baidu.com', 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', 'Accept-Encoding' => 'gzip, deflate, br', ] ]); $price = $html->find('.xiaoquUnitPrice')->text(); if(empty($price)){ echo '没有价格'.$build->id; continue; } BuildInfo::where(['id'=>$build->id])->update(['price'=>$price]); } } catch(Exception $e) { dd('爬取详情链接'.$linkInfo->links.PHP_EOL.'报错'.$e->getMessage()); } }); } }