这个函数很不错,curl函数中的header还可以加入随机UA等功能防屏蔽,以后改改做自己的采集类!

/**
    QQ群:223494678
    函数:模拟post得到所有分页的页面信息
    参数:
        string $EVENTARGUMENT
        string $VIEWSTATE
        string $EVENTVALIDATION
        string $EVENTTARGET
    返回:
        string
/**/
function getn($EVENTARGUMENT = "", $VIEWSTATE = "", $EVENTVALIDATION = "", $EVENTTARGET = "pager"){
    $args = array();
    if($EVENTARGUMENT){
        $args = array(
            '__EVENTTARGET'=>$EVENTTARGET,
            '__EVENTARGUMENT'=>$EVENTARGUMENT,
            '__VIEWSTATE'=>$VIEWSTATE,
            '__EVENTVALIDATION'=>$EVENTVALIDATION,
            '__VIEWSTATEENCRYPTED'=>'',
            'search$txtFundName='=>'',
            'search$txtFundManger'=>'',
            'search$ddlFoundationDateOperater'=>'1',
            'search$txtFoundationDate'=>'',
            'search$dltFundType$ctl01$chkFundType'=>'on',
            'search$dltFundType$ctl01$chklFundChildType$0'=>'on',
            'search$dltFundType$ctl01$chklFundChildType$1'=>'on',
            'search$dltFundType$ctl01$chklFundChildType$2'=>'on',
            'search$dltFundType$ctl01$chklFundChildType$3'=>'on',
            'search$dltFundType$ctl01$chklFundChildType$4'=>'on',
            'search$chklFundStatus$0'=>'on',
            'search$ddlFundOrg'=>'0',
            'search$txtFundOrgName'=>'',
            'search$ddlStatisticDateOperater'=>'1',
            'search$txtStatisticDate'=>'',
            'search$radlStatisticMode'=>'1'
        );
    }
 
    $user_agent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.79 Safari/535.11";
 
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, 'http://???/default.aspx');
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);// 设为TRUE让结果不要直接输出
    curl_setopt($ch, CURLOPT_VERBOSE, TRUE);
    curl_setopt($ch, CURLOPT_AUTOREFERER, TRUE);
    curl_setopt($ch, CURLOPT_FAILONERROR, TRUE);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
    curl_setopt($ch, CURLOPT_HEADER, TRUE);
    curl_setopt($ch, CURLINFO_HEADER_OUT, TRUE);
 
    curl_setopt($ch, CURLOPT_HTTPHEADER, array(
    'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language:zh-CN,zh;q=0.8',
    'Connection: Keep-Alive',
    'Cache-Control:max-age=0',
    'Referer:http://???/default.aspx',
    'Expect:'
    ));
 
    curl_setopt($ch, CURLOPT_POST, true); //启用POST提交
    curl_setopt($ch, CURLOPT_POSTFIELDS, $args); //设置POST提交的字符串
    curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);//HTTP请求User-Agent:头
 
    $document = curl_exec($ch); //执行预定义的CURL
    return $document;
}
 
/**
    QQ群:223494678
    函数:根据模拟post所得的页面信息,提取所需post的数据和分页,最后分解需要的html
    返回:
        string
/**/
function getHtml(){
    global $html;
    $first = getn();
    preg_match('/(\d+?)<\/b><\/font> 页<\/span>/is', $first, $matches);
    $total = $matches[1];
    preg_match('/

说明:

getHtml是採集入口文件,裏面先取每一頁的數據,並提取頁碼等數據,然後循環採集後面的數據,getn是採集函數,主要是CURL模擬POST了。

转自:

http://www.cnblogs.com/see7di/p/3428354.html

转自http://www.cnblogs.com/see7di/p/3560329.html

此代码片段的功能是在做均衡负载的时候将session的维护工作交给Memcached,这样不仅性能上来了,session不同步的问题也解决了!

但是要求做Memcache的计算机内存要足够大!

$_ENV=array(
    'SYS'=>array(
        'Memip'=>'127.0.0.1',            //MEMCACHE的ip
        'Mempt'=>11211,              //MEMCACHE的port
        'Memtim'=>10,                    //MEMCACHE的超时时间
    )
);
 
//托管SESSION到MEMCACHE,如果开启失败,则说明MEMCACHE没有配置正确
final class S{
    public static function open(){
    //禁止session自动开启
        session_write_close();
    ini_set('session.auto_start',0);
 
    //使用五十分之一的概率启动GC清理会话
    ini_set('session.gc_probability',1);
    ini_set('session.gc_divisor',50);
    ini_set('session.use_cookies',1);
 
    //session的寿命
    //ini_set('session.gc_maxlifetime',$_ENV['SYS']['Memtim']);
    //ini_set('session.cookie_lifetime',$_ENV['SYS']['Memtim']);
    //ini_set('session.save_handler','files');
    //ini_set('session.save_path',Run.'_tmp');
    //ini_set('session.save_handler','memcache');
    //ini_set('session.save_path','tcp://127.0.0.1:11211');
        //$_ENV['S_tim'] = ini_get('session.gc_maxlifetime');
 
    //建立memcache对象
    $_ENV['S_mem']=new Memcache;
    $_ENV['S_mem']->connect($_ENV['SYS']['Memip'],$_ENV['SYS']['Mempt']) or die('Memcache连接失败!');
        return TRUE;
    }
 
    /**
        读取
        返回:读到的内容
    /**/
    public static function read($id){
        return $_ENV['S_mem']->get('s_'.$id);
    }
 
    /**
        写入
        返回:bool
    /**/
    public static function write($id,$data){
        return $_ENV['S_mem']->set('s_'.$id,$data,MEMCACHE_COMPRESSED,$_ENV['SYS']['Memtim']);
    }
 
    /**
        关闭
        返回:bool
    /**/
    public static function close(){
        $_ENV['S_mem']->close();
        unset($_ENV['S_mem'],$_ENV['SYS']['Memtim']);
        return TRUE;
    }
 
    /**
        删除
        返回:bool
    /**/
    public static function destroy($id){
        return $_ENV['S_mem']->delete('s_'.$id);
    }
 
    /**
        清理
        返回:bool
    /**/
    public static function gc(){
        return TRUE;
    }
}
session_set_save_handler('S::open','S::close','S::read','S::write','S::destroy','S::gc');
$_ENV['sessionid']=(isset($_REQUEST['sessionid'])) ? trim($_REQUEST["sessionid"]) : session_id();
if($_ENV['sessionid']!=''){session_id($_ENV['sessionid']);}
session_start();

做采集的都知道,一般采集过来的内容难免会带有html标签,如果有太多的标签会影响之后的数据分析或提取,所以需要过滤掉!PHP已经为我们提供了很多清除html格式的方法了,下面就让老高介绍一下。

strip_tags

strip_tags($str) 去掉 HTML 及 PHP 的标记 语法: string strip_tags(string str); 传回值: 字串 函式种类: 资料处理 内容说明 : 解析:本函式可去掉字串中包含的任何 HTML 及 PHP 的标记字串。若是字串的 HTML 及 PHP 标签原来就有错,例如少了大于的符号,则也会传回错误。这个函数和 fgetss() 有着相同的功能

例子

echo strip_tags("Hello world!");
# Hello world!

htmlspecialchars

这个函数把html中的标签转换为html实体,博客的代码展示就必须使用这个函数,要不贴出来的代码就会被执行了。 预定义的字符是: & (和号) 成为 & ” (双引号) 成为 ” ‘ (单引号) 成为 ‘ < (小于) 成为 < > (大于) 成为 >

例子

$new = htmlspecialchars("Test", ENT_QUOTES);
echo $new; 
# <a href='test'>Test</a>
# 如果需要展现
,那么浏览器解析HTML的时候会自动将他变为换行 # 但是通过htmlspecialchars就可以让< 变为 '

与htmlspecialchars功能相反的函数是htmlspecialchars_decode,他会把HTML实体转化为字符!

后补函数

PHP去除html、css样式、js格式的方法很多,但发现,它们基本都有一个弊端:空格往往清除不了 经过不断的研究,最终找到了一个理想的去除html包括空格css样式、js 的PHP函数。

$descclear = str_replace("\r","",$descclear);//过滤换行
$descclear = str_replace("\n","",$descclear);//过滤换行
$descclear = str_replace("\t","",$descclear);//过滤换行
$descclear = str_replace("\r\n","",$descclear);//过滤换行
$descclear = preg_replace("/\s+/", " ", $descclear);//过滤多余回车
$descclear = preg_replace("/<[ ]+/si","<",$descclear); //过滤<__("<"号后面带空格)
$descclear = preg_replace("/<\!--.*?-->/si","",$descclear); //过滤html注释
$descclear = preg_replace("/<(\!.*?)>/si","",$descclear); //过滤DOCTYPE
$descclear = preg_replace("/<(\/?html.*?)>/si","",$descclear); //过滤html标签
$descclear = preg_replace("/<(\/?head.*?)>/si","",$descclear); //过滤head标签
$descclear = preg_replace("/<(\/?meta.*?)>/si","",$descclear); //过滤meta标签
$descclear = preg_replace("/<(\/?body.*?)>/si","",$descclear); //过滤body标签
$descclear = preg_replace("/<(\/?link.*?)>/si","",$descclear); //过滤link标签
$descclear = preg_replace("/<(\/?form.*?)>/si","",$descclear); //过滤form标签
$descclear = preg_replace("/cookie/si","COOKIE",$descclear); //过滤COOKIE标签
$descclear = preg_replace("/<(applet.*?)>(.*?)<(\/applet.*?)>/si","",$descclear); //过滤applet标签
$descclear = preg_replace("/<(\/?applet.*?)>/si","",$descclear); //过滤applet标签
$descclear = preg_replace("/<(style.*?)>(.*?)<(\/style.*?)>/si","",$descclear); //过滤style标签
$descclear = preg_replace("/<(\/?style.*?)>/si","",$descclear); //过滤style标签
$descclear = preg_replace("/<(title.*?)>(.*?)<(\/title.*?)>/si","",$descclear); //过滤title标签
$descclear = preg_replace("/<(\/?title.*?)>/si","",$descclear); //过滤title标签
$descclear = preg_replace("/<(object.*?)>(.*?)<(\/object.*?)>/si","",$descclear); //过滤object标签
$descclear = preg_replace("/<(\/?objec.*?)>/si","",$descclear); //过滤object标签
$descclear = preg_replace("/<(noframes.*?)>(.*?)<(\/noframes.*?)>/si","",$descclear); //过滤noframes标签
$descclear = preg_replace("/<(\/?noframes.*?)>/si","",$descclear); //过滤noframes标签
$descclear = preg_replace("/<(i?frame.*?)>(.*?)<(\/i?frame.*?)>/si","",$descclear); //过滤frame标签
$descclear = preg_replace("/<(\/?i?frame.*?)>/si","",$descclear); //过滤frame标签
$descclear = preg_replace("/<(script.*?)>(.*?)<(\/script.*?)>/si","",$descclear); //过滤script标签
$descclear = preg_replace("/<(\/?script.*?)>/si","",$descclear); //过滤script标签
$descclear = preg_replace("/javascript/si","Javascript",$descclear); //过滤script标签
$descclear = preg_replace("/vbscript/si","Vbscript",$descclear); //过滤script标签
$descclear = preg_replace("/on([a-z]+)\s*=/si","On\\1=",$descclear); //过滤script标签
$descclear = preg_replace("/&#/si","&#",$descclear); //过滤script标签,如javAsCript:alert();
//使用正则替换
$pat = "/<(\/?)(script|i?frame|style|html|body|li|i|map|title|img|link|span|u|font|table|tr|b|marquee|td|strong|div|a|meta|\?|\%)([^>]*?)>/isU";
$descclear = preg_replace($pat,"",$descclear);

总结

采集这个东西说简单很简单,但说难真的很难。一旦遇到错误,就会让人很抓狂!

想要成为采集高手,你不仅需要了解从一个计算机发出的基于TCP的HTTP请求到最终得到请求的文件的整个过程,而且能够使用一系列的工具来协助你跟踪数据的去处,同时需要考虑你写出的采集任务的效率!

如果你需要采集twitter或者Facebook的数据,可以参考使用shadowsocks轻松搭建FQ环境

代码网上参考的,一共有两种,自己优化了一下:

1.离线方法

参考:http://www.iganlei.cn/demo/186.html

 -7; $i -= 7) //从最后往前以7字节为一组读取mid
        {
        $offset1 = $i < 0 ? 0 : $i;
        $offset2 = $i + 7;
        $num     = substr($mid, $offset1, $offset2 - $offset1);
        $num     = int10to62($num);
        $url     = $num . $url;
    }
    return $url;
}

function getNewUrl($uid,$mid)
{
    $newUrl = 'http://weibo.com/' . $uid . '/' . getCodeByMid($mid);
    return $newUrl;
}


echo getNewUrl('phpgao', '3524952365496186');

2.api方法

参考:http://blog.csdn.net/k1988/article/details/6684114

mid说明:http://open.weibo.com/wiki/Querymid

$re = json_decode(file_get_contents("http://api.t.sina.com.cn/queryid.json?mid=xhMRc8nNu&isBase62=1&type=1"));
$id = $re->id;
echo $id;
 
$re = json_decode(file_get_contents("http://api.t.sina.com.cn/querymid.json?id=$id"));
echo $re->mid;
exit;

搜PHP长链接把这个搜出来了,2006年的帖子比较久远了,希望对你有帮助!

转自http://bbs.phpchina.com/forum.php?mod=viewthread&tid=4577&page=1 中forest的回帖:

永久的数据库连接是指在脚本结束运行时不关闭的连接。当收到一个永久连接的请求时。PHP 将检查是否已经存在一个(前面已经开启的)相同的永久连接。如果存在,将直接使用这个连接;如果不存在,则建立一个新的连接。所谓“相同”的连接是指用相同的用户名和密码到相同主机的连接。

对 web 服务器的工作和分布负载没有完全理解的读者可能会错误地理解永久连接的作用。特别的,永久连接不会在相同的连接上提供建立“用户会话”的能力,也不提供有效建立事务的能力。实际上,从严格意义上来讲,永久连接不会提供任何非永久连接无法提供的特殊功能。

为什么?

这和 web 服务器工作的方式有关。web 服务器可以用三种方法来利用 PHP 生成 web 页面。

第一种方法是将 PHP 用作一个“外壳”。以这种方法运行,PHP 会为向 web 服务器提出的每个 PHP 页面请求生成并结束一个 PHP 解释器线程。由于该线程会随每个请求的结束而结束,因此任何在这个线程中利用的任何资源(例如指向 SQL 数据库服务器的连接)都会随线程的结束而关闭。在这种情况下,使用永久连接不会获得任何地改变――因为它们根本不是永久的。

第二,也是最常用的方法,是把 PHP 用作多进程 web 服务器的一个模块,这种方法目前只适用于 Apache。对于一个多进程的服务器,其典型特征是有一个父进程和一组子进程协调运行,其中实际生成 web 页面的是子进程。每当客户端向父进程提出请求时,该请求会被传递给还没有被其它的客户端请求占用的子进程。这也就是说当相同的客户端第二次向服务端提出请求时,它将有可能被一个不同的子进程来处理。在开启了一个永久连接后,所有请求 SQL 服务的后继页面都能够重新使用这个已经建立的 SQL Server 连接。

最后一种方法是将 PHP 用作多线程 web 服务器的一个插件。目前 PHP 4 已经支持 ISAPI、WSAPI 和 NSAPI(在 Windows 环境下),这些使得 PHP 可以被用作诸如 Netscape FastTrack (iPlanet)、Microsoft’s Internet Information Server (IIS) 和 O’Reilly’s WebSite Pro 等多线程 web 服务器的插件。永久连接的行为和前面所描述的多过程模型在本质上是相同的。注意 PHP 3 不支持 SAPI。

如果永久连接并没有任何附加的功能,那么使用它有什么好处?

答案非常简单――效率。当客户端对 SQL 服务器的连接请求非常频繁时,永久连接将更加高效。连接请求频繁的标准取决于很多因素。例如,数据库的种类,数据库服务和 web 服务是否在同一台服务器上,SQL 服务器如何加载负载等。但我们至少知道,当连接请求很频繁时,永久连接将显著的提高效率。它使得每个子进程在其生命周期中只做一次连接操作,而非每次在处理一个页面时都要向 SQL 服务器提出连接请求。这也就是说,每个子进程将对服务器建立各自独立的永久连接。例如,如果有 20 个不同的子进程运行某脚本建立了永久的 SQL 服务器永久连接,那么实际上向该 SQL 服务器建立了 20 个不同的永久连接,每个进程占有一个。

注意,如果永久连接的子进程数目超过了设定的数据库连接数限制,系统将会产生一些缺陷。如果数据库的同时连接数限制为 16,而在繁忙会话的情况下,有 17 个线程试图连接,那么有一个线程将无法连接。如果这个时候,在脚本中出现了使得连接无法关闭的错误(例如无限循环),则该数据库的 16 个连接将迅速地受到影响。请查阅使用的数据库的文档,以获取关于如何处理已放弃的及闲置的连接的方法。 警告

在使用永久连接时还有一些特别的问题需要注意。例如在永久连接中使用数据表锁时,如果脚本不管什么原因无法释放该数据表锁,其随后使用相同连接的脚本将会被永久的阻塞,使得需要重新启动 httpd 服务或者数据库服务。另外,在使用事务处理时,如果脚本在事务阻塞产生前结束,则该阻塞也会影响到使用相同连接的下一个脚本。不管在什么情况下,都可以通过使用 register_shutdown_function() 函数来注册一个简单的清理函数来打开数据表锁,或者回滚事务。或者更好的处理方法,是不在使用数据表锁或者事务处理的脚本中使用永久连接,这可以从根本上解决这个问题(当然还可以在其它地方使用永久连接)。

最近在做采集微博的功能,由于要自动采集,所以必须获得最终的用户的token,获取了这个token以后就可以为所欲为啦!

贴代码

$s = new SaeTOAuthV2 ( WB_AKEY, WB_SKEY);
//准备post数据
$post ['action'] = 'login';
//$post ['display'] = 'default';
//$post ['withOfficalFlag'] = 0;
//$post ['quick_auth'] = null;
//$post ['withOfficalAccount'] = '';
//$post ['scope'] = '';
//$post ['ticket'] = '';
//$post ['isLoginSina'] = '';
//$post ['isLoginSina'] = 'code';
//regCallback|appkey62需要预先请求一次拿到
//$post ['regCallback'] = $match_regCallback[1];
//这里不用urlencode,http_build_query会自动处理
$post ['redirect_uri'] = WB_CALLBACK_URL;
$post ['client_id'] = WB_AKEY;
//$post ['appkey62'] = $match_appkey[1];
//$post ['state'] = '';
//$post ['verifyToken'] = null;
//$post ['from'] = '';
$post ['userId'] = '*******';//测试用户名
$post ['passwd'] = '*******';//测试密码,不要有!@¥%……&*()这些符号,最好全是字符或数字,否则验证会失败
$post = http_build_query($post);
//UA貌似没有也行,保险起见还是加上吧
$UA = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36';
//获取认证URL作为refer
$url = $s->getAuthorizeURL (WB_CALLBACK_URL);
//echo $url."\n";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://api.weibo.com/oauth2/authorize');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
//自动跳转
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_REFERER, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $UA);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
$response = curl_exec($ch);
//获取最后一个有效的url地址
$token_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
echo $token_url."\n";
curl_close($ch);
$code = preg_match('#code\=(.*)#',$token_url,$match_url);
//找到code
if($code>=1)
{
	$code = $match_url[1];
	$keys = array();
	$keys['code'] = trim($code);
	$keys['redirect_uri'] = WB_CALLBACK_URL;
	try 
	{
		$token = $s->getAccessToken( 'code', $keys );
	} catch (OAuthException $e) 
	{
		die($e->getMessage());
	}
	return $token;
}else
{
	die("code获取失败");
}

思路很简单,直接给 https://api.weibo.com/oauth2/authorize 发送登录信息,通过认证后,他会返回给你一个跳转URL,并带有一个code变量,这个code还是与开发者绑定的。

然后使用这个code,与服务器的 http://open.weibo.com/wiki/Oauth2/authorize 接口通信,返回的才是最终你需要的token,这个token同时与用户绑定,将他保存在session里随时调用!

思路参考:

http://hi.baidu.com/bing008ok/item/0a2bce068d1f8e0aeafe385d

新装的机子执行一下,请酌情使用,出问题后果自负。

优化设置

This script is only for 64bit Operating System ! 64位系统专用,老高有修改

#!/bin/bash
#author suzezhi
#this script is only for CentOS 6
#check the OS

platform=`uname -i`
if [ $platform != "x86_64" ];then
echo "this script is only for 64bit Operating System !"
exit 1
fi
echo "the platform is ok"
version=`lsb_release -r |awk '{print substr($2,1,1)}'`
if [ $version != 6 ];then
echo "this script is only for CentOS 6 !"
exit 1
fi
cat << EOF
+---------------------------------------+
|   your system is CentOS 6 x86_64      |
|      start optimizing.......          |
+---------------------------------------
EOF

#make the 163.com as the default yum repo

#rpm -Uvh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-7.noarch.rpm
#rpm -Uvh http://rpms.famillecollet.com/enterprise/remi-release-6.rpm

yum install iotop lsof wget ntpdate crontabs -y

#mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
#wget http://mirrors.163.com/.help/CentOS6-Base-163.repo -O /etc/yum.repos.d/CentOS-Base.repo

#add the third-party repo
#add the epel
rpm -Uvh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-6

#add the rpmforge
rpm -Uvh http://packages.sw.be/rpmforge-release/rpmforge-release-0.5.2-2.el6.rf.x86_64.rpm
rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-rpmforge-dag

rpm -Uvh http://rpms.famillecollet.com/enterprise/remi-release-6.rpm

#update the system and set the ntp
yum clean all
yum -y update glibc\*
yum -y update yum\* rpm\* python\*
yum -y update
yum -y install ntp
echo "* 4 * * * /usr/sbin/ntpdate 210.72.145.44 > /dev/null 2>&1" >> /var/spool/cron/root
service crond restart

#set the file limit
echo "ulimit -SHn 102400" >> /etc/rc.local
cat >> /etc/security/limits.conf << EOF
*           soft   nofile       65535
*           hard   nofile       65535
EOF

#set the control-alt-delete to guard against the miSUSE
sed -i 's#exec /sbin/shutdown -r now#\#exec /sbin/shutdown -r now#' /etc/init/control-alt-delete.conf

#disable selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config

#set ssh
sed -i 's/^GSSAPIAuthentication yes$/GSSAPIAuthentication no/' /etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/' /etc/ssh/sshd_config
service sshd restart

#tune kernel parametres
cat >> /etc/sysctl.conf << EOF
net.ipv4.tcp_fin_timeout = 1
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_synack_retries = 1
net.ipv4.tcp_syn_retries = 1
net.ipv4.tcp_tw_recycle = 1
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.netdev_max_backlog = 262144
net.core.somaxconn = 262144
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.tcp_max_syn_backlog = 262144
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
EOF
/sbin/sysctl -p

#define the backspace button can erase the last character typed
#echo 'stty erase ^H' >> /etc/profile
#echo "syntax on" >> /root/.vimrc

#stop some crontab
mkdir /etc/cron.daily.bak
mv /etc/cron.daily/makewhatis.cron /etc/cron.daily.bak
mv /etc/cron.daily/mlocate.cron /etc/cron.daily.bak
chkconfig bluetooth off
chkconfig cups off
chkconfig ip6tables off
chkconfig iptables off
chkconfig fcoe off
chkconfig iscsi off
chkconfig iscsid off
chkconfig lldpad off
chkconfig nfslock off
chkconfig nfs off
chkconfig rpcbind off
chkconfig rpcgssd off
chkconfig rpcidmapd off
#disable the ipv6
cat > /etc/modprobe.d/ipv6.conf << EOFI
alias net-pf-10 off
options ipv6 disable=1
EOFI
echo "NETWORKING_IPV6=off" >> /etc/sysconfig/network
cat << EOF
+-------------------------------------------------+
|               optimizer is done                 |
|   it's recommond to restart this server !       |
+-------------------------------------------------+
EOF

cp -f /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

对脚本的内容做一下说明:

  1. 先对系统进行判断,如果是Cent OS 64位,就继续运行。
  2. 先将系统的安装源设置为网易的(网易的安装源算是国内比较稳定的)
  3. 安装epel的源和rpmforge的源,利用第三方的源来让yum安装起来更方便
  4. 更新软件
  5. 设置为每天凌晨四点进行时间同步(跟国家授时中心的服务器进行时间同步)
  6. 将系统同时打开的文件个数增大
  7. 将ctrl alt delete键进行屏蔽,防止误操作的时候服务器重启
  8. 关闭selinux
  9. 禁用GSSAPI来认证,也禁用DNS反向解析,加快SSH登陆速度
  10. 优化一些内核参数
  11. 调整删除字符的按键为backspace(某些系统默认是delete)
  12. 打开vim的语法高亮
  13. 取消生成whatis数据库和locate数据库
  14. 关闭没用的服务
  15. 关闭IPv6

安全设置

#!/bin/bash

echo "#####################################"
echo "#####Centos Sytem Security Shell#####"
echo "#####################################"

time=`date  "+%Y-%m-%d"`

#1#######Lock Useless Users#########
for i in adm lp sync shutdown halt news uucp operator games gopher ftp
do
usermod -L $i
done

#2######Close Useless Services########
for i in nfs postfix ypbind portmap smb netfs lpd snmpd named squid xinetd apmd autofs cups isdn nfslock pcmcia sendmail xinetd
do
chkconfig --level 2345 $i off
done

#3#####Directory And File Limit#######
chattr +i /etc/passwd /etc/shadow /etc/group /etc/gshadow
# chattr -i /etc/passwd /etc/shadow /etc/group /etc/gshadow

chmod -R 700 /etc/rc.d/init.d/*
chmod 644 /var/log/wtmp /var/run/utmp

#4#####Clean System Banner###########
cp /etc/issue /etc/issue.$time
cp /etc/issue.net /etc/issue.net.$time
echo "" > /etc/issue
echo "" > /etc/issue.net

#5####Alter login.defs###############
cp /etc/login.defs /etc/login.defs.$time
egrep -v "^#|^$" /etc/login.defs|grep PASS_MIN_LEN
if [ $? = 0 ] ;then
egrep -v "^#|^$" /etc/login.defs|awk '/PASS_MIN_LEN/{print "PASS_MIN_LEN 12";next};{print}' >/etc/login.defs.tmp
mv -f /etc/login.defs.tmp /etc/login.defs
else
echo "PASS_MIN_LEN 12" >>/etc/login.defs
fi
egrep -v "^#|^$" /etc/login.defs|grep PASS_MAX_DAYS
if [ $? = 0 ] ;then
egrep -v "^#|^$" /etc/login.defs|awk '/PASS_MAX_DAYS/{print "PASS_MAX_DAYS 90";next};{print}' >/etc/login.defs.tmp
mv -f /etc/login.defs.tmp /etc/login.defs
else
echo "PASS_MAX_DAYS 90" >>/etc/login.defs
fi

#6####Alter profile###############
cp /etc/profile /etc/profile.$time
egrep -v "^#|^$" /etc/profile|grep TMOUT
if [ $? = 0 ] ;then
egrep -v "^#|^$" /etc/profile|awk '/TMOUT/{print "TMOUT=300;export TMOUT";next};{print}' >/etc/profile.tmp
mv -f /etc/profile.tmp /etc/profile
else
echo "TMOUT=300;export TMOUT" >>/etc/profile
fi

#7####Alter inittab###############
cp /etc/inittab /etc/inittab.bak$time
egrep -v "^#|^$" /etc/inittab|grep ca::ctrlaltdel
if [ $? = 0 ] ;then
egrep -v "^#|^$" /etc/inittab|awk '/^ca::ctrlaltdel/ {sub($1,"#"$1,$0);next};{print}' >/etc/inittab.tmp
mv -f /etc/inittab.tmp /etc/inittab
else
echo -e "#ca::ctrlaltdel:/sbin/shutdown\t-t3\t-r\tnow" >>/etc/inittab
fi

#8####Alter syslog###############
cp /etc/syslog.conf /etc/syslog.conf.$time
egrep -v "^#|^$" /etc/syslog.conf|egrep "\*\.err"
if [ $? != 0 ] ;then
echo "*.err        /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "\*\.info"
if [ $? != 0 ] ;then
echo "*.info        /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "\*\.emerg"
if [ $? != 0 ] ;then
echo "*.emerg        /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "local7\.\*"
if [ $? != 0 ] ;then
echo "local7.*        /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "kern\.debug"
if [ $? != 0 ] ;then
echo "kern.debug       /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "kern\.warning"
if [ $? != 0 ] ;then
echo "kern.warning       /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "authpriv\.none"
if [ $? != 0 ] ;then
echo "authpriv.none       /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "mail\.none"
if [ $? != 0 ] ;then
echo "mail.none       /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "daemon\.notice"
if [ $? != 0 ] ;then
echo "daemon.notice        /var/adm/messages" >>/etc/syslog.conf
fi
egrep -v "^#|^$" /etc/syslog.conf|egrep "cron\.\*"
if [ $? != 0 ] ;then
echo "cron.*      /var/log/cron" >>/etc/syslog.conf
fi
/etc/rc.d/init.d/syslog restart

#9####Alter host.conf###############
cp /etc/host.conf /etc/host.conf.$time
echo -e "order\tbind,hosts\nmulti\ton\nnospoof\ton" >/etc/host.conf

#10####Alter limits.conf###############
cp /etc/security/limits.conf /etc/security/limits.conf.$time
egrep -v "^#|^$" /etc/security/limits.conf|egrep "\*[[:space:]]soft[[:space:]]core[[:space:]]0"
if [ $? != 0 ] ;then
echo -e "*\tsoft\tcore\t0" >> /etc/security/limits.conf
fi
egrep -v "^#|^$" /etc/security/limits.conf|egrep "\*[[:space:]]hard[[:space:]]core[[:space:]]0"
if [ $? != 0 ] ;then
echo -e "*\thard\tcore\t0" >> /etc/security/limits.conf
fi
egrep -v "^#|^$" /etc/security/limits.conf|egrep "\*[[:space:]]hard[[:space:]]rss[[:space:]]5000"
if [ $? != 0 ] ;then
echo -e "*\thard\trss\t5000" >> /etc/security/limits.conf
fi
egrep -v "^#|^$" /etc/security/limits.conf|egrep "\*[[:space:]]hard[[:space:]]nproc[[:space:]]20"
if [ $? != 0 ] ;then
echo -e "*\thard\tnproc\t20" >> /etc/security/limits.conf
fi

#11####Alter pam_login###############
cp /etc/pam.d/login /etc/pam.d/login.$time
egrep -v "^#|^$" /etc/pam.d/login|egrep "^session[[:space:]]required[[:space:]]/lib/security/pam_limits.so"
if [ $? != 0 ] ;then
echo -e "session\trequired\t/lib/security/pam_limits.so" >> /etc/pam.d/login
fi

#12####Alter sshd_config###############
cp /etc/ssh/sshd_config /etc/ssh/sshd_config.$time
egrep -v "^#|^$" /etc/ssh/sshd_config|egrep "^Protocol[[:space:]]2"
if [ $? != 0 ] ;then
echo -e "Protocol 2" >>/etc/ssh/sshd_config
fi

#13####Alter sysctl.conf###############
cp /etc/sysctl.conf /etc/sysctl.conf.$time
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.tcp_max_syn_backlog"
if [ $? != 0 ] ;then
echo "net.ipv4.tcp_max_syn_backlog=4096" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.conf_all.rp_filter"
if [ $? != 0 ] ;then
echo "net.ipv4.conf_all.rp_filter=1" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.tcp_syncookies"
if [ $? != 0 ] ;then
echo "net.ipv4.tcp_syncookies=1" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.conf\.all\.send_redirects"
if [ $? != 0 ] ;then
echo "net.ipv4.conf.all.send_redirects=0" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.conf\.all\.accept_redirects"
if [ $? != 0 ] ;then
echo "net.ipv4.conf.all.accept_redirects=0" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.ip_forward"
if [ $? != 0 ] ;then
echo "net.ipv4.ip_forward=0" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.conf\.all\.accept_source_route"
if [ $? != 0 ] ;then
echo "net.ipv4.conf.all.accept_source_route=0" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.conf\.default\.accept_redirects"
if [ $? != 0 ] ;then
echo "net.ipv4.conf.default.accept_redirects=0" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.conf\.default\.send_redirects"
if [ $? != 0 ] ;then
echo "net.ipv4.conf.default.send_redirects=0" >>/etc/sysctl.conf
fi
egrep -v "^#|^$" /etc/sysctl.conf|egrep "net\.ipv4\.icmp_echo_ignore_broadcasts"
if [ $? != 0 ] ;then
echo "net.ipv4.icmp_echo_ignore_broadcasts=1" >>/etc/sysctl.conf
fi
sysctl -p /etc/sysctl.conf
chown root:root /etc/sysctl.conf
chmod 600 /etc/sysctl.conf

脚本转自:

http://www.linuxde.net/2011/12/5756.html http://www.linuxpad.cn/linux/centos-sytem-security-shell.html