sitemapall.inc.php 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. <?php
  2. if (!defined('IN_DISCUZ')) {
  3. exit('Access Denied');
  4. }
  5. ini_set('memory_limit','512M');
  6. set_time_limit(300);
  7. if (empty($_GET['p']) || $_GET['p'] != baidu_get_plugin_setting('sppasswd')) {
  8. baidu_header_status(404);
  9. return 1;
  10. }
  11. $start_tid = intval($_GET['start']);
  12. $sitemap = baidu_get_sitemap(1, $start_tid);
  13. if (empty($sitemap)) {
  14. baidu_header_status(404);
  15. return 1;
  16. }
  17. $end_tid = $sitemap['end'];
  18. $client_etag = !empty($_SERVER['HTTP_IF_NONE_MATCH']) ? trim($_SERVER['HTTP_IF_NONE_MATCH']) : false;
  19. if ($client_etag) {
  20. $client_etag_max_tid = intval($client_etag); //取整,去掉后面的无效字符
  21. //起始id设成上次的最大值
  22. if ($client_etag_max_tid > $start_tid) {
  23. $start_tid = $client_etag_max_tid;
  24. }
  25. if ($client_etag_max_tid >= $end_tid) { //抓取过的数据再次抓取的时候
  26. $etag = $client_etag;
  27. header('HTTP/1.1 304 Not Modified');
  28. header('ETag: ' . $etag);
  29. return 1;
  30. }
  31. }
  32. //清掉钩子
  33. $_G['setting']['plugins']['func'] = array();
  34. //设成最后的值
  35. $etag = $end_tid;
  36. header('ETag: ' . $etag);
  37. $threadlist = C::t('#baidusubmit#forum_thread_baidu')->get_thread_by_tidrange($start_tid, $end_tid);
  38. $itemCount = 0;
  39. $fileSize = 0;
  40. $urlnum = 0;
  41. $installmaxtid = baidu_get_plugin_setting('installmaxtid');
  42. global $_G;
  43. header('Content-Type: text/xml');
  44. echo '<?xml version="1.0" encoding="UTF-8"?><urlset>';
  45. $forumlist = baidu_get_forum_list();
  46. foreach ($threadlist as $tid => $thread) {
  47. $output = baidu_schema_body_build($forumlist[$thread['fid']], $thread);
  48. if ($output === false) {
  49. continue;
  50. }
  51. $fileSizeCheck = $fileSize + strlen($output);
  52. $itemCountCheck = $itemCount + 1;
  53. // split sitemap file
  54. if ($fileSizeCheck >= 1024 * 1024 * 8 || $itemCountCheck > 5000) {
  55. // 并发问题
  56. $sp = baidu_get_sitemap(1, $start_tid, $end_tid);
  57. if ($sp) {
  58. $new_start_tid = $thread['tid'];
  59. $past_tid = $new_start_tid - $start_tid - 1;
  60. $count = ceil(($end_tid - $new_start_tid) / $past_tid);
  61. for ($i = 0; $i < $count; $i++) {
  62. $_xstart = $new_start_tid + $past_tid * $i;
  63. $_xend = $_xstart + $past_tid - 1;
  64. if ($_xend > $end_tid) {
  65. $_xend = $end_tid;
  66. }
  67. $url = "sitemapall&start={$_xstart}";
  68. C::t('#baidusubmit#baidusubmit_sitemap')->add($url, 1, $_xstart, $_xend);
  69. }
  70. $new_end_tid = $new_start_tid - 1;
  71. $new_url = "sitemapall&start={$start_tid}";
  72. C::t('#baidusubmit#baidusubmit_sitemap')->update_by_sid(
  73. $sp['sid'], array('url' => $new_url, 'start' => $start_tid, 'end' => $new_end_tid));
  74. $end_tid = $new_end_tid;
  75. }
  76. break;
  77. }
  78. echo $output;
  79. $fileSize = $fileSizeCheck;
  80. $itemCount = $itemCountCheck;
  81. if ($tid <= $installmaxtid) {
  82. $urlnum ++;
  83. }
  84. flush();
  85. }
  86. echo '</urlset>';
  87. $timeLost = intval(1000 * (microtime(true) - $_G['starttime']));
  88. C::t('#baidusubmit#baidusubmit_sitemap')->update_by_sid(
  89. $sitemap['sid'], array('item_count' => $itemCount, 'file_size' => $fileSize, 'lost_time' => $timeLost));
  90. baidu_update_url_stat($urlnum);