wget https://nginx.org/download/nginx-1.14.0.tar.gz
tar -zxvf nginx-1.14.0.tar.gz
cd nginx-1.14.0
安装依赖库:
yum install gcc-c++
yum install -y pcre pcre-devel
yum install -y zlib zlib-devel
yum install -y openssl openssl-devel
编译安装:
./configure
make install
nginx命令:
start nginx(linux 下直接nginx启动即可)
nginx -s stop(关闭)
nginx -s reload(重启)
nginx负载均衡配置:
1 #user nobody; #全局块 2 worker_processes 1; 3 4 #error_log logs/error.log; 5 #error_log logs/error.log notice; 6 #error_log logs/error.log info; 7 8 #pid logs/nginx.pid; 9 10 11 events { # events块 12 worker_connections 1024; 13 } 14 15 16 http { # http块 17 include mime.types; # http全局块 18 default_type application/octet-stream; 19 20 #log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 21 # '$status $body_bytes_sent "$http_referer" ' 22 # '"$http_user_agent" "$http_x_forwarded_for"'; 23 24 #access_log logs/access.log main; 25 26 sendfile on; 27 #tcp_nopush on; 28 29 #keepalive_timeout 0; 30 keepalive_timeout 65; 31 32 #gzip on; 33 34 upstream myserver.com{ # 负载均衡配置 35 server 127.0.0.1:8080; # 可以在此添加weight(权重),配置每个服务器流量的权重;例如:server 127.0.0.1:8080 weight=1; 36 } 37 38 server { # server块 39 # 反爬虫 # server 全局块 40 include anti_spider.conf; # 加载反爬虫配置 41 42 listen 80; 43 server_name 211.67.160.21; 44 45 #charset koi8-r; 46 47 #access_log logs/host.access.log main; 48 49 location ~*^.+$ { # location 块 50 proxy_pass http://myserver.com; 51 allow all; 52 } 53 54 error_page 404 https://www.baidu.com; 55 56 #error_page 404 /404.html; 57 58 # redirect server error pages to the static page /50x.html 59 # 60 error_page 500 502 503 504 /50x.html; 61 location = /50x.html { 62 root html; 63 } 64 65 # proxy the PHP scripts to Apache listening on 127.0.0.1:80 66 # 67 #location ~ .php$ { 68 # proxy_pass http://127.0.0.1; 69 #} 70 71 # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000 72 # 73 #location ~ .php$ { 74 # root html; 75 # fastcgi_pass 127.0.0.1:9000; 76 # fastcgi_index index.php; 77 # fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name; 78 # include fastcgi_params; 79 #} 80 81 # deny access to .htaccess files, if Apache's document root 82 # concurs with nginx's one 83 # 84 #location ~ /.ht { 85 # deny all; 86 #} 87 88 location ~* ^.+.(html|jpg|jpeg|gif|png|ico|css|js)$ 89 { 90 root D:/register; 91 expires 30d; 92 break; 93 } 94 95 location ~ ^/static/ { 96 root D:/register; 97 expires 30d; 98 break; 99 } 100 101 location ~ ^/ { 102 fastcgi_pass 127.0.0.1:80; 103 fastcgi_param PATH_INFO $fastcgi_script_name; 104 fastcgi_param REQUEST_METHOD $request_method; 105 fastcgi_param QUERY_STRING $query_string; 106 fastcgi_param CONTENT_TYPE $content_type; 107 fastcgi_param CONTENT_LENGTH $content_length; 108 fastcgi_param SERVER_PROTOCOL $server_protocol; 109 fastcgi_param SERVER_PORT $server_port; 110 fastcgi_param SERVER_NAME $server_name; 111 fastcgi_pass_header Authorization; 112 fastcgi_intercept_errors off; 113 } 114 } 115 116 117 # another virtual host using mix of IP-, name-, and port-based configuration 118 # 119 #server { 120 # listen 8000; 121 # listen somename:8080; 122 # server_name somename alias another.alias; 123 124 # location / { 125 # root html; 126 # index index.html index.htm; 127 # } 128 #} 129 130 131 # HTTPS server 132 # 133 #server { 134 # listen 443 ssl; 135 # server_name localhost; 136 137 # ssl_certificate cert.pem; 138 # ssl_certificate_key cert.key; 139 140 # ssl_session_cache shared:SSL:1m; 141 # ssl_session_timeout 5m; 142 143 # ssl_ciphers HIGH:!aNULL:!MD5; 144 # ssl_prefer_server_ciphers on; 145 146 # location / { 147 # root html; 148 # index index.html index.htm; 149 # } 150 #} 151 152 }
- 1、全局块:配置影响nginx全局的指令。一般有运行nginx服务器的用户组,nginx进程pid存放路径,日志存放路径,配置文件引入,允许生成worker process数等。
- 2、events块:配置影响nginx服务器或与用户的网络连接。有每个进程的最大连接数,选取哪种事件驱动模型处理连接请求,是否允许同时接受多个网路连接,开启多个网络连接序列化等。
- 3、http块:可以嵌套多个server,配置代理,缓存,日志定义等绝大多数功能和第三方模块的配置。如文件引入,mime-type定义,日志自定义,是否使用sendfile传输文件,连接超时时间,单连接请求数等。
- 4、server块:配置虚拟主机的相关参数,一个http中可以有多个server。
- 5、location块:配置请求的路由,以及各种页面的处理情况。
nginx反爬虫:
1 #禁止Scrapy等工具的抓取 2 if ($http_user_agent ~* (Scrapy|Curl|HttpClient)) { 3 return 403; 4 } 5 6 #禁止指定UA及UA为空的访问 7 if ($http_user_agent ~ "WinHttp|WebZIP|FetchURL|node-superagent|java/|FeedDemon|Jullo|JikeSpider|Indy Library|Alexa Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|Java|Feedly|Apache-HttpAsyncClient|UniversalFeedParser|ApacheBench|Microsoft URL Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms|BOT/0.1|YandexBot|FlightDeckReports|Linguee Bot|^$" ) { 8 return 403; 9 } 10 11 #禁止非GET|HEAD|POST方式的抓取 12 if ($request_method !~ ^(GET|HEAD|POST)$) { 13 return 403; 14 } 15 16 #屏蔽单个IP的命令是 17 #deny 123.45.6.7 18 #封整个段即从123.0.0.1到123.255.255.254的命令 19 #deny 123.0.0.0/8 20 #封IP段即从123.45.0.1到123.45.255.254的命令 21 #deny 124.45.0.0/16 22 #封IP段即从123.45.6.1到123.45.6.254的命令是 23 #deny 123.45.6.0/24 24 25 # 以下IP皆为流氓 26 #deny 58.95.66.0/24;
常见垃圾UA列表:
> FeedDemon 内容采集
> BOT/0.1 (BOT for JCE) sql注入
> CrawlDaddy sql注入
> Java 内容采集
> Jullo 内容采集
> Feedly 内容采集
> UniversalFeedParser 内容采集
> ApacheBench cc攻击器
> Swiftbot 无用爬虫
> YandexBot 无用爬虫
> AhrefsBot 无用爬虫
> YisouSpider 无用爬虫
> jikeSpider 无用爬虫
> MJ12bot 无用爬虫
> ZmEu phpmyadmin 漏洞扫描
> WinHttp 采集cc攻击
> EasouSpider 无用爬虫
> HttpClient tcp攻击
> Microsoft URL Control 扫描
> YYSpider 无用爬虫
> jaunty wordpress爆破扫描器
> oBot 无用爬虫
> Python-urllib 内容采集
> Indy Library 扫描
> FlightDeckReports Bot 无用爬虫
> Linguee Bot 无用爬虫