菜单
本页目录

介绍

SiteInfoScan是一款url的详细信息扫描工具,输入一个url或urls文件,可选是否查询ICP备案单位和CMS信息,将扫描结果输出到Excel中,输出字段:响应码、URL、Host、归属单位、单位性质、标题、敏感内容、关键词、页面描述、响应长度、站点服务、CMS、IPv4、IP归属国、IP归属省、IP归属市、IP归属区县、IP归属运营商。

脚本参数

usage: SiteInfoDetial.py [-h] [-u URL] [-f FILE] [-c CMS] [-i ICP] [-t THREAD]

options:
  -h, --help            show this help message and exit
  -u URL, --url URL     单个URL检测
  -f FILE, --file FILE  指定url文件(一行一个)
  -c CMS, --cms CMS     是否检测CMS,如果目标多的话,比较耗时(bool: false)
  -i ICP, --icp ICP     是否返回ICP,如果目标多的话,比较耗时(bool: false)
  -t THREAD, --thread THREAD
                        并发数(int: 5)

输出日志

[ https://www.baidu.com ] [200] 百度一下,你就知道
[ output: ] output\SiteInfoDetialScan_1734681113\result.xlsx

代码源码

# -*- coding: utf-8 -*-
from secScript import analyze_url, Function, outPath, log, program_exit, icp, dns, ip
import openpyxl, argparse, os.path, time
import concurrent.futures
from urllib.parse import urlparse

result = []
model = 'SiteInfoDetialScan'


# 输出和记录结果方法
def output(status, link, root, unit, unit_type, title, an_lian, keywords, description, length, server: str,
           cms: list | str, s_ip, g, s, sh, q, y) -> list:
    global result
    if type(cms) is list: cms = ",".join(cms)
    cms = cms if cms else ""
    res = [status, link, root, unit, unit_type, title, an_lian, keywords, description, length, server, cms,
           s_ip, g, s, sh, q, y]
    result.append(res)
    log(link, "[{}] {}".format(status, title))
    return res


# 去除非法字符
def replace_non_printable(s):
    res = []
    for char in s: res.append(' ') if ord(char) < 32 or ord(char) == 127 else res.append(char)
    # ASCII控制字符范围(包括\x00-\x1F和\x7F)
    return ''.join(res)


# 主要检测方法
def run(_url: str, cms_: bool = False, icp_: bool = False) -> list:
    res = analyze_url(_url, cms_)
    if res:
        redirect = res.get("redirect")
        if redirect:
            log(_url, "【Redirect To】 {}".format(res.get("redirect")))
            res = analyze_url(redirect if redirect.startswith('http') else f"{_url}{redirect}", cms_)
        if res:
            # 归属单位
            unit = ''
            # 单位性质
            unit_type = ''
            # 根域名
            root_d = Function.getRootDomain(_url)
            # 域名
            host = urlparse(_url).hostname
            # 根域名或ip地址
            root_ = root_d if root_d else host
            # icp信息
            if icp_:
                _icp = icp(root_)
                if _icp:
                    try:
                        unit = _icp.get("data")["list"][0]["unitName"]
                        unit_type = _icp.get("data")["list"][0]["natureName"]
                    except:
                        pass
            s_ip = ''
            _dns = dns(host)
            if _dns is None: print(_dns)
            if _dns: s_ip = _dns.get('ip')
            g, s, sh, q, y = '', '', '', '', ''
            s_ip = s_ip[0] if len(s_ip) > 0 else ''
            if _dns is None and s_ip == "" and root_d == "": s_ip = host
            if "" == s_ip: print(_url, root_d == "", host)
            if s_ip:
                d = ip(s_ip)
                try:
                    g = d.get("Country")
                    s = d.get("Province")
                    sh = d.get("city")
                    q = d.get("County")
                    y = d.get("Operator")
                except:
                    pass
            cms = ','.join(res.get('CMS')) if res.get('CMS') else ''
            anlian = ','.join(res.get("anlian")) if res.get("anlian") else ''
            return output(
                res.get("status"), res.get("link"), host,
                unit, unit_type,
                res.get('title').strip().strip("\n"), anlian, res.get("keywords"), res.get("description"),
                res.get('length'), res.get('server'),
                str(cms), s_ip, g, s, sh, q, y
            )


# 输出Excel表格
def outExcel(path: str) -> str:
    path = os.path.join(outPath, "{}_{}".format(model, path))
    if not os.path.exists(path): os.makedirs(path)
    workbook = openpyxl.Workbook()
    worksheet = workbook.active
    worksheet.append([
        "响应码", "URL", "Host", "归属单位", "单位性质", "标题", "敏感内容", "关键词", "页面描述", "响应长度",
        "站点服务", "CMS", "IPv4", "IP归属国", "IP归属省", "IP归属市", "IP归属区县", "IP归属运营商"
    ])
    for i in result:
        if i is None or len(i) == 0: continue
        try:
            worksheet.append([
                i[0], i[1], i[2], i[3], i[4], replace_non_printable(i[5]), i[6], replace_non_printable(i[7]),
                replace_non_printable(i[8]), i[9], i[10], i[11], i[12], i[13], i[14],
                i[15], i[16], i[17]
            ])
        except Exception as e:
            print("err", e)
    file = os.path.join(path, "result.xlsx")
    workbook.save(file)
    log("output:", file)
    return file


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-u', '--url', help='单个URL检测')
    parser.add_argument('-f', '--file', help='指定url文件(一行一个)')
    parser.add_argument('-c', '--cms', help='是否检测CMS,如果目标多的话,比较耗时(bool: false)', default=False,
                        type=bool)
    parser.add_argument('-i', '--icp', help='是否返回ICP,如果目标多的话,比较耗时(bool: false)', default=False,
                        type=bool)
    parser.add_argument('-t', '--thread', help='并发数(int: 5)', default=5, type=int)
    args = parser.parse_args()
    if args.thread < 1: args.thread = 1
    if args.url:
        run(args.url, args.cms, args.icp)
    elif args.file:
        try:
            with concurrent.futures.ProcessPoolExecutor(max_workers=args.thread) as executor:
                futures = [executor.submit(run, i, args.cms, args.icp) for i in Function.fileGetUrl(args.file)]
                result = [i.result() for i in futures]
        except KeyboardInterrupt:
            program_exit()
    else:
        parser.print_help()
    if len(result) > 0: outExcel(str(int(time.time())))