sitemapinc.inc.php 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. <?php
  2. if (!defined('IN_DISCUZ')) {
  3. exit('Access Denied');
  4. }
  5. ini_set('memory_limit','512M');
  6. set_time_limit(300);
  7. if (empty($_GET['p']) || $_GET['p'] != baidu_get_plugin_setting('sppasswd')) {
  8. baidu_header_status(404);
  9. return 1;
  10. }
  11. $startTime = intval(@$_GET['start']);
  12. $sitemap = baidu_get_sitemap(2, $startTime);
  13. if (empty($sitemap)) {
  14. baidu_header_status(404);
  15. return 1;
  16. }
  17. $endTime = $sitemap['end'];
  18. $client_etag = !empty($_SERVER['HTTP_IF_NONE_MATCH']) ? trim($_SERVER['HTTP_IF_NONE_MATCH']) : false;
  19. if ($client_etag) {
  20. //获取etag中的时间
  21. $client_etag_max_time = intval($client_etag); //取整,去掉后面的无效字符
  22. //起始设成上次的最大值
  23. if ($client_etag_max_time > $startTime) {
  24. $startTime = $client_etag_max_time;
  25. }
  26. if ($client_etag_max_time > $endTime) { //抓取过的数据再次抓取的时候
  27. $etag = $client_etag_max_time;
  28. header('HTTP/1.1 304 Not Modified');
  29. header('ETag: '.$etag);
  30. return 1;
  31. }
  32. }
  33. //清掉钩子
  34. $_G['setting']['plugins']['func'] = array();
  35. $etag = time();
  36. header('ETag: '.$etag);
  37. define('_MAX_THREAD_COUNT_', 5000);
  38. $threadlist = C::t('#baidusubmit#forum_thread_baidu')->get_thread_by_lastpost($startTime, $endTime, _MAX_THREAD_COUNT_);
  39. $indexsplitsitemap = false;
  40. $threadCount = count($threadlist);
  41. if ($threadCount >= _MAX_THREAD_COUNT_) {
  42. $indexsplitsitemap = true;
  43. }
  44. $itemCount = 0;
  45. $fileSize = 0;
  46. global $_G;
  47. header('Content-Type: text/xml');
  48. echo '<?xml version="1.0" encoding="UTF-8"?><urlset>';
  49. $sizesplitsitemap = false;
  50. $forumlist = baidu_get_forum_list();
  51. foreach ($threadlist as $tid => $thread) {
  52. $output = baidu_schema_body_build($forumlist[$thread['fid']], $thread);
  53. if($output === false){
  54. continue;
  55. }
  56. $fileSizeCheck = $fileSize + strlen($output);
  57. $itemCount += 1;
  58. if ($fileSizeCheck >= 1024*1024*8) {
  59. $sizesplitsitemap = true;
  60. break;
  61. }
  62. $fileSize = $fileSizeCheck;
  63. echo $output;
  64. flush();
  65. }
  66. echo '</urlset>';
  67. //分裂数据
  68. if ($sizesplitsitemap || ($indexsplitsitemap && $thread['lastpost']<$endTime)) { //超过sitemap文件限制进行分裂
  69. $sp = baidu_get_sitemap(2, $startTime, $endTime);
  70. if ($sp) {
  71. //计算裂变
  72. $newStartTime = $thread['lastpost'];
  73. //裂变步长
  74. $stepLen = intval(($newStartTime - $startTime - 1) * 0.3);
  75. $curTime = time();
  76. //只裂变到当前时间
  77. $count = ceil(($curTime - $newStartTime) / $stepLen);
  78. for ($i=0; $i<$count; $i++) {
  79. $_xstart = $newStartTime + $stepLen * $i;
  80. $_xend = $_xstart + $stepLen - 1;
  81. if ($_xend > $curTime) {
  82. $_xend = $curTime;
  83. }
  84. $url = "sitemapinc&start={$_xstart}";
  85. C::t('#baidusubmit#baidusubmit_sitemap')->add($url, 2, $_xstart, $_xend);
  86. }
  87. //把最后一个加上
  88. $nextTime = $curTime + 1;
  89. C::t('#baidusubmit#baidusubmit_sitemap')->add("sitemapinc&start={$nextTime}", 2, $nextTime, $endTime);
  90. $newEndTime = $newStartTime - 1;
  91. $newUrl = "sitemapinc&start={$startTime}";
  92. C::t('#baidusubmit#baidusubmit_sitemap')->update_by_sid(
  93. $sp['sid'],
  94. array('url' => $newUrl, 'start' => $startTime, 'end' => $newEndTime));
  95. $endTime = $newEndTime;
  96. }
  97. }
  98. //记录相关数据
  99. $timeLost = intval(1000 * (microtime(true) - $_G['starttime']));
  100. C::t('#baidusubmit#baidusubmit_sitemap')->update_by_sid(
  101. $sitemap['sid'],
  102. array('item_count' => $itemCount, 'file_size' => $fileSize, 'lost_time' => $timeLost));
  103. baidu_update_url_stat($itemCount);