python3 cms识别类

参考了其他人的思路，总结了下主要搜集有四个方面：

1、index.php源代码中特征内容判断

2、静态目录中的静态资源文件，如图片等

3、根目录robots.txt 文本中的特征判断

4、根目录favicon.ico MD5的特征判断

5、http相应包中的报文特征内容判断

参考langzi的代码

# coding=utf-8

import requests
import json
import hashlib
from threading import Thread



class CmsScan:
    def __init__(self, dest_url):
        self.headers = {
            'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"
        }

        self.dest_url = dest_url

        with open(__file__[0:-6] + 'body.txt', 'r', encoding='utf-8') as a,open(__file__[0:-6] + 'head.txt', 'r', encoding='utf-8') as b,open(__file__[0:-6] + 'robots.txt', 'r', encoding='utf-8') as d,open(__file__[0:-6] + 'data.json', 'r', encoding='utf-8') as e,open(__file__[0:-6] + 'cms_rule.txt', 'r', encoding='utf-8') as f:
            self.body_content = eval(a.read()) #已经完成
            self.head_content = eval(b.read()) #已经完成
            self.robots_content = eval(d.read()) #已经完成
            self.rule_content = eval(f.read()) #已经完成
            self.data_content = json.load(e) # 已经完成


    def header_index_content_scan(self):
        # 基于首页源代码中的特征寻找
        try:
            resp = requests.get(url=self.dest_url, headers=self.headers, allow_redirects=False, timeout=3, verify=False)

            # 迭代器迭代字典
            for keyword, cms in self.body_content.items():
                if keyword in resp.content.decode('utf-8'):
                    print("[header_index_content_scan]: cms识别为 " + cms)
                    return
                else:
                    pass

            for keyword, cms in self.head.iteritems():
                if keyword in resp.headers:
                    print("[scan01_headers]: cms识别为 " + cms)
                    return
                else:
                    pass
        except:
            pass


    def robot_scan(self):
        # 根目录robots.txt 文本中的特征判断

        if self.dest_url[-1] != '/':
            self.dest_url = self.dest_url + '/'

        try:
            resp = requests.get(url=self.dest_url + 'robots.txt', headers=self.headers, allow_redirects=False, timeout=3, verify=False)
            for robots in self.robots_content:
                if robots in resp.content.decode('utf-8'):
                    print("[robots_scan]: cms识别为 " + robots)
                    return
                else:
                    pass
        except:
            pass


    def sub_dir_content_scan(self):
        # cms的特征目录内容文件特征判断

        if self.dest_url[-1] == '/':
            self.dest_url = self.dest_url[:-1]

        for sub_dir in self.data_content:
            resp = requests.get(url=self.dest_url + sub_dir['url'], headers=self.headers, allow_redirects=False, timeout=3, verify=False)
            cms_type = sub_dir['name']
            if resp.status_code == 200:
                try:
                    resp_2 = requests.get(url=self.dest_url + sub_dir['url'], headers=self.headers, allow_redirects=False, timeout=3, verify=False)
                    if sub_dir['md5'] == '':
                        # md5不存在的情况下的验证流程
                        if sub_dir['re'] in resp_2.content.decode('utf-8'):
                            print(cms_type)
                            return
                        else:
                            pass
                    else:
                        # md5存在的情况下的验证流程
                        md5 = hashlib.md5()
                        md5.update(resp_2.content)
                        rmd5 = md5.hexdigest()
                        if rmd5 == sub_dir['md5']:
                            print(cms_type)
                            return
                        else:
                            pass
                except:
                    pass
            else:
                pass


    def md5_scan(self):
        # 根目录favicon.ico等 特征MD5的特征判断

        if self.dest_url[-1] == '/':
            self.dest_url = self.dest_url[:-1]


        for i in self.rule_content:
            # 拆分
            cms_dir = i.split('|')[0]
            cms_type = i.split('|')[1]
            cms_md5 = i.split('|')[2]

            try:
                resp = requests.get(url=self.dest_url + cms_dir, headers=self.headers, allow_redirects=False, timeout=3, verify=False)
                md5 = hashlib.md5()
                md5.update(resp.content)
                rmd5 = md5.hexdigest()
                print(rmd5)

                if rmd5 == cms_md5:
                    print(cms_type)
                    return
                else:
                    pass
            except:
                pass



if '__main__' == __name__:
    a = CmsScan('http://news.eeeqi.cn/')
    thread_list = []

    thread_list.append(Thread(target=a.header_index_content_scan, args=()))
    thread_list.append(Thread(target=a.robot_scan, args=()))
    thread_list.append(Thread(target=a.sub_dir_content_scan, args=()))
    thread_list.append(Thread(target=a.md5_scan, args=()))

    for t in thread_list:
        t.start()
    for t in thread_list:
        t.join()

相关阅读:
如何通过地址转换为WGS经纬度
 Oracle动态创建时间分区，以及Oracle12c中快速创建自增列
 asp.net微信jsapi支付
 asp.net关于如何准许api跨域访问
 ajax调用天气接口
 git补充（命令）转自https://github.com/Wasdns/github-example-repo
git补充（关于pull request）转自知乎
 Linux基础笔记
 git前期准备
 MVC设计模式
原文地址：https://www.cnblogs.com/zpchcbd/p/12606889.html