Skip to content

Commit

Permalink
add dir tool
Browse files Browse the repository at this point in the history
  • Loading branch information
lxbccc committed Jul 10, 2019
1 parent 647bae5 commit 9f10371
Show file tree
Hide file tree
Showing 14 changed files with 850 additions and 636 deletions.
23 changes: 0 additions & 23 deletions app/Console/Commands/BaseCrawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,10 @@ public function binsearch_end($start_lower=1){

$res=$this->spclient->get($this->spbase_url.$end_36 )->getStatusCode();
}while($res==200);

// if ($res!=404) {
// echo $this->spbase_url.$end_36 ;
// die('需要开启代理 http://blog.csdn.net/hitxiaya/article/details/25233087 sslocal -s us2.fogip.pw -p 50312 -k "666666" -l 1081 -t 600 -m rc4-md5');
// }


$high=base_convert($end_36,36,10);
$middle = 1;
echo '向上查找 404 结果:'.$end_36." [{$high}]\n";//die;
Expand Down Expand Up @@ -242,31 +239,24 @@ public function prepare_sprequests(){
public $arr_req_code_36 =[];
public function prepare_sprequests_update($notwith404=0,$start_from=1,$refind_code_36 = 1){
$this->arr_req_code_36 = [];

$requrl = $this->spbase_url.'a';
$this->info("GET: $requrl before sql");
$response = $this->spclient->get($requrl);
$code = $response->getStatusCode();
$type = $response->getHeader('content-type');
$parsed = \GuzzleHttp\Psr7\parse_header($type);
$this->spcharset = isset($parsed[0]['charset']) ?$parsed[0]['charset']: 'UTF-8';

if($code==200 || $code==404 ){
}else{
$this->error( __METHOD__ .":[$requrl |====链接无效]") ;
die;
}

$this->save_data($response);



$istart = $start_from==0?base_convert(file_get_contents($this->file_code_36),36,10):$start_from;
$end_code_36 = $refind_code_36 ==1?$this->binsearch_end($istart):file_get_contents($this->file_code_36);

$this->info("准备数据:{$this->start_type} $end_code_36");
$total = base_convert($end_code_36,36,10);

if($total - $istart < 1 ){
$this->info("{$this->start_type} 没有更新");
return false;
Expand All @@ -275,8 +265,6 @@ public function prepare_sprequests_update($notwith404=0,$start_from=1,$refind_co
$this->info($sql);
$table_code_36 = $this->database->query($sql)->fetchAll(\PDO::FETCH_COLUMN, 0);



$all_code_36 = [];
$len = base_convert($end_code_36,36,10);
for ($i = 1; $i <=$len; $i++) {
Expand All @@ -299,8 +287,6 @@ public function prepare_sprequests_update($notwith404=0,$start_from=1,$refind_co
}
$this->info("{$this->start_type} 升级数量: $total");



$requests = function ($total) {
foreach ($this->arr_req_code_36 as $key=> $item) {
$uri =$this->spbase_url.$item;
Expand All @@ -309,15 +295,6 @@ public function prepare_sprequests_update($notwith404=0,$start_from=1,$refind_co
yield new Request('GET', $uri );
}
};

// $requests = function ($istart,$total) {
// for ($i = $istart; $i < $total; $i++) {
// $uri =$this->spbase_url.base_convert($i,10,36);
// echo "[=($i)-($total)=]";
// yield new Request('GET', $uri );
// }
// };

$this->sprequests = $requests($total);
return true;
}
Expand Down
76 changes: 19 additions & 57 deletions app/Console/Commands/JavbusCrawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class JavbusCrawler extends BaseCrawler
*
* @var string
*/
protected $signature = 'avbook:javbus {--movieid=} {--magnetid=} {--moviemax=300} {--movie404=1} {--movie=} {--page=} {--magpage=} ';
protected $signature = 'avbook:javbus {--movieid=} {--magnetid=} {--moviemax=120} {--movie404=1} {--movie=} {--page=} {--magpage=} {--genre=}';

/**
* The console command description.
Expand Down Expand Up @@ -123,7 +123,7 @@ public function handle_page($gid)
}

}
public function handle_all_page($pagenum = 10){
public function handle_all_page($pagenum = 10,$genre = ''){
$cf =\App\Tools\CrawlerUpdate::get_crawler_config();
$this->sphost = $cf['javbushost'];
$this->hosturl = "https://{$this->sphost}/";
Expand All @@ -141,11 +141,10 @@ public function handle_all_page($pagenum = 10){
"Cache-Control" =>"max-age=0"
];
$this->crawler_client_init($this->hosturl,$start_type,$this->table_prefix,$headers);
$r = $this->prepare_page_rquests("https://{$this->sphost}/page/1",$pagenum);
$r = $this->prepare_page_rquests("https://{$this->sphost}/page/1",$pagenum,$genre);
if($r){
$this->start_spider(300);
}

}
public function handle_all_magnet($pagenum = 1){
$cf =\App\Tools\CrawlerUpdate::get_crawler_config();
Expand All @@ -170,7 +169,6 @@ public function handle_all_magnet($pagenum = 1){
$this->start_spider(300);
}
}

public function handle_magnet($gid)
{

Expand Down Expand Up @@ -218,7 +216,6 @@ public function handle_magnet($gid)
*/
public function handle()
{
// var_dump($this->option('magpage2'));die;
if($this->option('movieid')){
$this->handle_movie($this->option('movieid'));
return;
Expand All @@ -227,15 +224,13 @@ public function handle()
$this->handle_magnet($this->option('magnetid'));
return;
}

//
if($this->option('movie')==1){
$movie404 = $this->option('movie404')*1;
$moviemax = $this->option('moviemax')*1;
$this->handle_all_movie($moviemax,$movie404);
}
if($this->option('page')*1 >0){
$this->handle_all_page($this->option('page')*1);
$this->handle_all_page($this->option('page')*1,$this->option('genre'));
}
if($this->option('magpage')!== null){
$this->handle_all_magnet($this->option('magpage')*1);
Expand Down Expand Up @@ -270,8 +265,8 @@ public function update_ja_code_36($table_javbus ,$table_avmoo)
echo "avmoo_code_36_null_num : {$data[0]['avmoo_code_36_null_num']} \n";

}
public function prepare_page_rquests($requrl,$pagenum=10){
$this->magnet_time = time();
public $sp_uri = '';
public function check_hosturl($requrl){
$this->info("GET: $requrl before sql");
$response = $this->spclient->get($requrl);
$code = $response->getStatusCode();
Expand All @@ -283,45 +278,38 @@ public function prepare_page_rquests($requrl,$pagenum=10){
$type = $response->getHeader('content-type');
$parsed = \GuzzleHttp\Psr7\parse_header($type);
$this->spcharset = isset($parsed[0]['charset']) ?$parsed[0]['charset']: 'UTF-8';
}
public function prepare_page_rquests($requrl,$pagenum=10,$genre=''){
$this->sp_uri = 'https://'.$this->sphost.'/page/';
if (!empty($genre)){
$this->sp_uri = 'https://'.$this->sphost.'/genre/'.$genre.'/';
}
$this->magnet_time = time();
$this->check_hosturl($requrl);
$this->arr_req_code_36 = [];
for ($i = 1; $i < $pagenum; $i++) {
$this->arr_req_code_36[]=$i;
}
$total = count($this->arr_req_code_36);

$this->info("{$this->start_type} 升级数量: $total");
$requests = function ($total) {
foreach ($this->arr_req_code_36 as $key=> $item) {
$uri = 'https://'.$this->sphost.'/page/'.$item;
$uri = $this->sp_uri.$item;
echo "[当前($key) 总数($total)| =($item)-|]";
yield new Request('GET', $uri );
}
};
$this->sprequests = $requests($total);
return true;

}

public function prepare_movie_rquests($requrl,$remove404=0){
$this->info("GET: $requrl before sql");
$response = $this->spclient->get($requrl);
$code = $response->getStatusCode();
if($code==200 || $code==404 ){
}else{
$this->error( __METHOD__ .":[$requrl |====链接无效]") ;
die;
}
$type = $response->getHeader('content-type');
$parsed = \GuzzleHttp\Psr7\parse_header($type);
$this->spcharset = isset($parsed[0]['charset']) ?$parsed[0]['charset']: 'UTF-8';

// $this->save_data($response);


$this->check_hosturl($requrl);
$sql = "select DISTINCT(censored_id) from avbook_avmoo_movie ";
$this->info($sql);
$result_code_avmoo = $this->database->query($sql)->fetchAll(\PDO::FETCH_COLUMN, 0);


$sql = "select DISTINCT(censored_id) from avbook_javbus_movie";
$this->info($sql);
$result_code_javbus = $this->database->query($sql)->fetchAll(\PDO::FETCH_COLUMN, 0);
Expand Down Expand Up @@ -377,20 +365,7 @@ public function prepare_movie_rquests($requrl,$remove404=0){

public function prepare_mag_rquests($requrl ,$pagenum = 1){
$this->update_ja_code_36("avbook_javbus_movie",'avbook_avmoo_movie');

$this->info("GET: $requrl before sql");
$response = $this->spclient->get($requrl);
$code = $response->getStatusCode();
if($code==200 || $code==404 ){
}else{
$this->error( __METHOD__ .":[$requrl |====链接无效]") ;
die;
}
$type = $response->getHeader('content-type');
$parsed = \GuzzleHttp\Psr7\parse_header($type);
$this->spcharset = isset($parsed[0]['charset']) ?$parsed[0]['charset']: 'UTF-8';
// $this->save_data($response);

$this->check_hosturl($requrl);
if ($pagenum==0){
// $sql = "select DISTINCT(gid) from {$this->table_prefix}movie where release_date > date_format(date_sub(now(),interval 1 year), '%Y-%m-%d') ";
$sql = "select DISTINCT(gid) from {$this->table_prefix}movie ";
Expand Down Expand Up @@ -643,25 +618,18 @@ public function get_info_magnet($response,$c_36=''){
public function get_info_page($response,$c_36=''){
$original_body = (string)$response->getBody();
$content = mb_convert_encoding($original_body, 'UTF-8', $this->spcharset);

$dom = new \DOMDocument();
@$dom->loadHTML($content);
$dom->normalize();
$xpath = new \DOMXPath($dom);

$nodeList = $xpath->query('//*[@class="photo-info"]');

$need_hd = '包含高清HD的磁力連結';
$need_sub = '包含字幕的磁力連結';

$t_i = 0;
foreach ($nodeList as $node) {
$str_node = $dom->saveHTML($node);

preg_match_all('#<date>(.*?)</date>#', $str_node, $outid);

if(!empty($outid[1][0])){

$sadd = '';
if (strpos($str_node, $need_hd)) {
$sadd .= ",have_hd=1";
Expand All @@ -672,14 +640,14 @@ public function get_info_page($response,$c_36=''){
$t_i = $t_i+1;
$m_time=$this->magnet_time-($c_36*30) -$t_i;
$t= date("Y-m-d H:i:s",$m_time);

$ssql = " update avbook_avmoo_movie set magnet_date = '$t', have_mg = 1 {$sadd} where censored_id ='{$outid[1][0]}'";
// echo $ssql .date("Y-m-d H:i:s",time())." \n";
// echo $outid[1][0],"|";
$this->database->query( $ssql);
}
}
}

public function get_info_movie($response,$c_36=''){
$original_body = (string)$response->getBody();
$content = mb_convert_encoding($original_body, 'UTF-8', $this->spcharset);
Expand Down Expand Up @@ -726,14 +694,9 @@ public function get_info_movie($response,$c_36=''){
$arr_data['Series'] = empty($out[1]) ? '' : implode(',',$out[1]);//'Series'

preg_match_all('#<span class="genre"><a href="'.$this->hosturl.'genre/(.*?)">#', $content, $out);

$arr_data['Genre'] = empty($out[1]) ? '' : '['.implode('][',$out[1]).']';//'Genre'


preg_match_all('#<a href="'.$this->hosturl.'star/(.*?)"><img src=#', $content, $out);

$arr_data['JAV_Idols'] = empty($out[1]) ? '' : '['.implode('][',$out[1]).']';

preg_match_all('#<a class="sample-box" href="(.*?)"><div class="photo-frame">#', $content, $out);
if (empty($out[1])) {
preg_match_all('#<div class="photo-frame"><img src="(.*?)" title#', $content, $out);
Expand All @@ -747,7 +710,6 @@ public function get_info_movie($response,$c_36=''){
}
preg_match_all('#class="movie-box" href="'.$this->hosturl.'(.*?)" style="display:inline-block; margin:5px;">#', $content, $out);
$arr_data['Similar'] = empty($out[1]) ? '' : '['.implode('][',$out[1]).']';//Similar
// var_dump($arr_data);die;
return $arr_data;
}
}
Loading

0 comments on commit 9f10371

Please sign in to comment.