123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- <?php
- namespace App\Console\Commands;
- use App\Model\BuildInfo;
- use App\Model\Last;
- use App\Model\Links;
- use Illuminate\Console\Command;
- use Illuminate\Support\Facades\Log;
- use QL\QueryList;
- class Crawle extends Command
- {
- /**
- * The name and signature of the console command.
- *
- * @var string
- */
- protected $signature = 'crawle:{type}';
- /**
- * The console command description.
- *
- * @var string
- */
- protected $description = 'Command description';
- const DOMAIN = 'https://cd.lianjia.com';
- /**
- * Create a new command instance.
- *
- * @return void
- */
- public function __construct()
- {
- parent::__construct();
- }
- /**
- * Execute the console command.
- *
- * @return mixed
- */
- public function handle()
- {
- $type = $this->argument('type');
- $count = explode('-',$type);
- if(is_array($count) && count($count) == 3){
- $start = $count[1];
- $end = $count[2];
- }
- if($type == 'list'){
- self::getList();
- } else if($type == 'detail'){
- self::getDetail($start,$end);
- } else if($type == 'findlose'){
- self::getloseinfo();
- }
- }
- public static function getList()
- {
- $last = Last::where(['type'=>1])->first();
- $communites = Links::where(['type'=>1]);
- if(!empty($last) && $last->last_id !== 0 ){
- $communites->where('id','>=',$last->last_id);
- }
- $communites = $communites->get(['links']);
- foreach ($communites as $communite){
- if(empty($last)){
- Last::insert(['type'=>2,'last_id'=>$communite->id]);
- } else {
- Last::where(['type'=>2])->update(['last_id'=>$communite->id]);
- }
- for($i=1;$i<100;$i++){
- $areas = QueryList::get(self::DOMAIN.$communite->links.'pg'.$i,null,[
- 'headers' => [
- 'Referer'=>'http://www.baidu.com',
- 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
- 'Accept-Encoding' => 'gzip, deflate, br',
- ]
- ])->find('.xiaoquListItem .title a')->attrs('href');
- if(empty($areas->toArray())){
- Log::info('失效页数'.self::DOMAIN.$communite->links.'pg'.$i);
- break;
- }
- foreach ($areas as $area){
- $is_have = Links::where(['links'=>$area])->value('id');
- if($is_have) continue;
- Links::insert(['links'=>$area,'type'=>2]);
- }
- sleep(rand(2,6));
- }
- }
- // self::getDetail();
- dd('end');
- }
- public static function getDetail($start=0,$end=0)
- {
- $rules = [
- // 楼盘名称
- 'build_name' => ['.detailTitle','text'],
- //市区
- 'district' => ['.l-txt a:eq(2)','text'],
- //社区
- 'community' => ['.l-txt a:eq(3)','text'],
- //简单地址
- 'sample_address' => ['.detailDesc','text'],
- //小区价格
- 'price'=>['.xiaoquUnitPrice','text'],
- //小区图片
- 'images' => ['.imgThumbnailList img','src'],
- //建成年份
- 'completed' => ['.xiaoquInfo .xiaoquInfoItem:eq(0) span:eq(1)','text'],
- //建筑结构
- 'structure_type' => ['.xiaoquInfo .xiaoquInfoItem:eq(1) span:eq(1)','text'],
- //物业公司
- 'tenement' => ['.xiaoquInfo .xiaoquInfoItem:eq(3) span:eq(1)','text'],
- //栋数
- 'bulid_num' => ['.xiaoquInfo .xiaoquInfoItem:eq(5) span:eq(1)','text'],
- //户数
- 'households' => ['.xiaoquInfo .xiaoquInfoItem:eq(6) span:eq(1)','text'],
- ];
- if($start > $end){
- dd('最后的数必须大于前面的数字');
- }
- if($end==5000){
- $type = 2;
- } else if($end == 10000) {
- $type = 3;
- } else if($end == 15200){
- $type = 4;
- } else {
- dd('位置类型');
- }
- $last = Last::where(['type'=>$type])->first();
- // dd($last->toArray());
- if(!empty($last) && $last->last_id !== 0 ){
- $start = $last->last_id ;
- }
- $link_info = Links::whereBetween('id',[$start,$end]);
- $link_info->where(['type'=>2])->chunk(100,function ($links) use ($rules,$last,$type){
- dd($links->toArray());
- try{
- foreach ($links as $link){
- echo '爬取详情链接'.$link->links.PHP_EOL;
- $html = QueryList::get($link->links,null,[
- 'headers' => [
- 'Referer'=>'http://www.baidu.com',
- 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
- 'Accept-Encoding' => 'gzip, deflate, br',
- ]
- ]);
- $build = $html->rules($rules)->query()->getData();
- $images = $html->find('.imgThumbnailList img')->attrs('src');
- sleep(rand(3,10));
- if(!empty($images)) $build['images'] = json_encode($images);
- if(empty($build->toArray())) continue;
- (!empty($build['bulid_num'])) ? $build['bulid_num'] = intval($build['bulid_num']) : $build['bulid_num'] = 0;
- (!empty($build['completed'])) ? $build['completed'] = intval($build['completed']) : $build['completed'] = 0;
- (!empty($build['price'])) ? $build['price'] = intval($build['price']) : $build['price'] = 0;
- if(empty($build['households'])) continue;
- if(!empty($build['households'])) $build['households'] = intval($build['households']);
- $is_have = BuildInfo::where(['build_name'=>$build['build_name']])->value('id');
- if($is_have){continue;}
- if(empty($last)){
- Last::insert(['type'=>$type,'last_id'=>$link->id]);
- } else {
- Last::where(['type'=>$type])->update(['last_id'=>$link->id]);
- }
- $build['link_id'] = $link->id;
- BuildInfo::insert($build->toArray());
- }
- } catch(Exception $e) {
- Log::info('爬取详情链接'.$link->links.PHP_EOL.'报错');
- dd($e->getMessage());
- }
- });
- dd('end');
- }
- public function getloseinfo(){
- $rules = [
- //小区价格
- 'price'=>['.xiaoquUnitPrice','text'],
- ];
- BuildInfo::where('price','=',null)->chunk(100,function ($builds) use ($rules){
- try{
- foreach ($builds as $build){
- sleep(rand(3,10));
- $linkInfo = Links::where(['id'=>$build->link_id])->first();
- echo '爬取详情链接'.$linkInfo->links.PHP_EOL;
- $html = QueryList::get($linkInfo->links,null,[
- 'headers' => [
- 'Referer'=>'http://www.baidu.com',
- 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
- 'Accept-Encoding' => 'gzip, deflate, br',
- ]
- ]);
- $price = $html->find('.xiaoquUnitPrice')->text();
- if(empty($price)){
- echo '没有价格'.$build->id;
- continue;
- }
- BuildInfo::where(['id'=>$build->id])->update(['price'=>$price]);
- }
- } catch(Exception $e) {
- dd('爬取详情链接'.$linkInfo->links.PHP_EOL.'报错'.$e->getMessage());
- }
- });
- }
- }
|