做网站最重要的其中一环肯定是收录,页面没有收录,其他都是空谈,更不会有搜索流量。由于每个行业的网站众多,如何让搜索引擎第一时间发现你的网站页面并且收录呢,百度的主动推送操作不能丢。每当你在百度站长平台提交网站后,百度也会提示你去主动推送URL让蜘蛛第一时间去抓取收录,收录上去了,SEO才有希望。对于海洋CMS专门做影视的程序,百度自动推送URL的功能并不完善,所以做一下教程来实现。
1、在根目录新建一个map目录;
2、在map目录里新建一个index.php,文件代码内容如下:
<?php
require_once(dirname(__FILE__)."/../include/common.php");
//前置跳转start
$cs=$_SERVER["REQUEST_URI"];
if($GLOBALS['cfg_mskin']==3 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost$cs");}
if($GLOBALS['cfg_mskin']==4 AND $GLOBALS['isMobile']==1){header("location:$cfg_mhost");}
//前置跳转end
require_once(sea_INC."/main.class.php");
header('Content-Type:text/xml;charset=UTF-8');
if($GLOBALS['cfg_runmode']==2||$GLOBALS['cfg_paramset']==0){
$paras=str_replace(getfileSuffix(),'',$_SERVER['QUERY_STRING']);
if(strpos($paras,"-")>0){
$parasArray=explode("-",$paras);
$tid=$parasArray[0];
$page=$parasArray[1];
}else{
$tid=intval($paras);
$page=1;
}
$tid = isset($tid) && is_numeric($tid) ? $tid : 0;
$page = isset($page) && is_numeric($page) ? $page : 1;
}else{
$tid = $$GLOBALS['cfg_paramid'];
$page = $$GLOBALS['cfg_parampage'];
$tid = isset($tid) && is_numeric($tid) ? $tid : 0;
$page = isset($page) && is_numeric($page) ? $page : 1;
}
$tid=intval($tid);
$page=intval($page);
//if($tid==0){
// showmsg('参数丢失,请返回!', -1);
// exit;
//}
$GLOBALS[tid]=$tid;
echoChannel($tid);
function echoChannel($typeId)
{
global $dsql,$cfg_iscache,$mainClassObj,$page,$t1,$cfg_user,$cfg_basehost;
$channelTmpName=getTypeTemplate($typeId);
$channelTmpName=empty($channelTmpName) ? "channel.html" : $channelTmpName;
$channelTemplatePath = "/map/channel.html";
if($GLOBALS['cfg_mskin']!=0 AND $GLOBALS['cfg_mskin']!=3 AND $GLOBALS['cfg_mskin']!=4 AND $GLOBALS['isMobile']==1)
{$channelTemplatePath = "/map/channel.html";}
//if (strpos(" ,".getHideTypeIDS().",",",".$typeId.",")>0) exit("<font color='red'>视频列表为空或被隐藏</font><br>");
//if ($cfg_user == 1){
// if (!getUserAuth($typeId, "list")){ShowMsg("您当前的会员级别没有权限浏览此内容!","../member.php",0,20000);exit();}
//}
$pSize = getPageSizeOnCache($channelTemplatePath,"channel",$channelTmpName);
if (empty($pSize)) $pSize=12;
$typeIds = getTypeId($typeId);
$typename=getTypeName($typeId);
if($typeId!="")
$extrasql = " or FIND_IN_SET('".$typeId."',v_extratype)<>0 ";
else
$extrasql = "";
$sql="select count(*) as dd from sea_data where (tid in (".$typeIds.") ".$extrasql.")";
$row = $dsql->GetOne($sql);
if(is_array($row))
{
$TotalResult = $row['dd'];
}
else
{
$TotalResult = 0;
}
$pCount = ceil($TotalResult/$pSize);
$currentTypeId = $typeId;
$cacheName = "parse_channel_".$currentTypeId.$GLOBALS['cfg_mskin'].$GLOBALS['isMobile'];
if($cfg_iscache){
if(chkFileCache($cacheName)){
$content = getFileCache($cacheName);
}else{
$content = parseChannelPart($channelTemplatePath,$currentTypeId);
$content = str_replace("{channelpage:typename}",$typename,$content);
$content = str_replace("{channelpage:typeid}",$currentTypeId,$content);
setFileCache($cacheName,$content);
}
}else{
$content = parseChannelPart($channelTemplatePath,$currentTypeId);
$content = str_replace("{channelpage:typename}",$typename,$content);
$content = str_replace("{channelpage:typeid}",$currentTypeId,$content);
}
$content = str_replace("{channelpage:page}",$page,$content);
$content=$mainClassObj->ParsePageList($content,$typeIds,$page,$pCount,$TotalResult,"channel",$currentTypeId);
$content=$mainClassObj->parseIf($content);
$content=str_replace("{seacms:member}",front_member(),$content);
$content = str_replace("{channelpage:order-hit-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hit&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-hitasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=hitasc&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-id-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=id&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-idasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=idasc&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-time-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=time&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-timeasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=timeasc&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-commend-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commend&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-commendasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=commendasc&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-score-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=score&tid=".$typeId,$content);
$content = str_replace("{channelpage:order-scoreasc-link}",$cfg_basehost."/search.php?page=1&searchtype=5&order=scoreasc&tid=".$typeId,$content);
echo str_replace("{seacms:runinfo}",getRunTime($t1),$content) ;
}
function parseChannelPart($templatePath,$currentTypeId)
{
global $mainClassObj;
$content=loadFile(sea_ROOT.$templatePath);
$content=$mainClassObj->parseTopAndFoot($content);
$content = str_replace("{seacms:currenttypeid}",$currentTypeId,$content);
$content=$mainClassObj->parseSelf($content);
$content=$mainClassObj->parseHistory($content);
$content=$mainClassObj->parseGlobal($content);
$content=$mainClassObj->parseMenuList($content,"",$currentTypeId);
$content=$mainClassObj->parseAreaList($content);
$content=$mainClassObj->parseVideoList($content,$currentTypeId);
$content=$mainClassObj->parseNewsList($content,$currentTypeId);
$content=$mainClassObj->parseTopicList($content);
$content = str_replace("{channelpage:typetext}",getTypeText($currentTypeId),$content);
$content = str_replace("{channelpage:keywords}",getTypeKeywords($currentTypeId),$content);
$content = str_replace("{channelpage:description}",getTypeDescription($currentTypeId),$content);
$content = str_replace("{channelpage:title}",getTypeTitle($currentTypeId),$content);
return $content;
}
?>
3、在map目录下新建一个channel.html文件,代码内容如下:
<?xml version="1.0" encoding="utf-8"?>
<urlset>
{seacms:channellist size=2000 order=time}
<url>
<loc>{seacms:siteurl}[channellist:link]</loc>
<lastmod>[channellist:time style=yyyy-mm-dd]</lastmod>
<changefreq>daily</changefreq>
<priority>0.8</priority>
</url>
{/seacms:channellist}
</urlset>
4、从xml文件取数据并做百度主动推送,代码如下:
#coding:utf-8
import requests,time,re,os
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def main():
# 删掉yesterday文件
# if os.path.exists('yesterday.txt'):
# os.remove('yesterday.txt')
#把xml中的数据拿下来,并和现有的数据去重后,留下的数据单独放到一个文件,并且追加到所有的url txt里
url = 'http://yp.jd.com/00/00_0.xml'
r = requests.get(url)
zhishi_url = re.findall(r'<loc>(.*?)</loc>',r.content)
has_push_list = [url.strip() for url in open('all_url.txt')]
f = open('all_url.txt',r'a+')#所有的url
f_ytd = open('yesterday_0.txt',r'w+')#昨天发布的文章url
f_ytd_m = open('yesterday_m_0.txt',r'w+')#昨天发布的文章url(m)
num = 0
txt_index = 0
for link in zhishi_url:#多
if link in has_push_list:
pass
else:
f.write(link+'n')#追加到所有的url txt里
f_ytd.write(link+'n')#把还未推送的url放到单独的文件内
f_ytd_m.write(link.replace('www','m')+'n')#把还未推送的url放到单独的文件内(m)
if num%2000 == 1999:
f_ytd.close()
txt_index += 1
f_ytd = open('yesterday_%s.txt'%txt_index,r'w+')
f_ytd_m = open('yesterday_m_%s.txt'%txt_index,r'w+')
num += 1
f.close()
f_ytd.close()
f_ytd_m.close()
print 'yesterday has %s'%num
print 'crawl done'
time.sleep(5)
#开始推送
print 'push begin'
for i in range(0,txt_index+1):
try:
headers = {'Content-Type':'text/plain'}
url = 'http://data.zz.baidu.com/urls'
params = {'site':'www.jd.com','token':'00'}#,'type':'original'
r = requests.post(url,params=params,headers=headers,data=open('yesterday_%s.txt'%i,r'rb').read())
#m
params_m = {'site':'m.jd.com','token':'00'}#,'type':'original'
r_m = requests.post(url,params=params_m,headers=headers,data=open('yesterday_m_
%s.txt'%i,r'rb').read())
print 'PC:'+r.content+','+'M:'+r_m.content
except Exception,e:
print e
continue
print 'Finish!!!'
if __name__ == '__main__':
while True:
current_time = time.localtime(time.time())
if((current_time.tm_hour == 18) and (current_time.tm_min == 0) and (current_time.tm_sec == 0)):
main()
百度自动推送的好处就是主动,而sitemap的操作是被动,需要搜索引擎蜘蛛主动去抓。但是需要注意的一点是,自动推送不要重复去推,会影响蜘蛛抓取,浪费蜘蛛资源。除了要推送,还有个要注意的地方,海洋CMS的演员链接是动态的URL,并不利于SEO,海洋CMS伪静态位置好尤其重要,请参考:海洋seacms演员名称伪静态设置:自动布局长尾关键词
发表评论
还没有评论,快来抢沙发吧!