介绍
SiteInfoScan
是一款url的详细信息扫描工具,输入一个url或urls文件,可选是否查询ICP备案单位和CMS信息,将扫描结果输出到Excel中,输出字段:响应码、URL、Host、归属单位、单位性质、标题、敏感内容、关键词、页面描述、响应长度、站点服务、CMS、IPv4、IP归属国、IP归属省、IP归属市、IP归属区县、IP归属运营商。
脚本参数
usage: SiteInfoDetial.py [-h] [-u URL] [-f FILE] [-c CMS] [-i ICP] [-t THREAD]
options:
-h, --help show this help message and exit
-u URL, --url URL 单个URL检测
-f FILE, --file FILE 指定url文件(一行一个)
-c CMS, --cms CMS 是否检测CMS,如果目标多的话,比较耗时(bool: false)
-i ICP, --icp ICP 是否返回ICP,如果目标多的话,比较耗时(bool: false)
-t THREAD, --thread THREAD
并发数(int: 5)
输出日志
[ https://www.baidu.com ] [200] 百度一下,你就知道
[ output: ] output\SiteInfoDetialScan_1734681113\result.xlsx
代码源码
# -*- coding: utf-8 -*-
from secScript import analyze_url, Function, outPath, log, program_exit, icp, dns, ip
import openpyxl, argparse, os.path, time
import concurrent.futures
from urllib.parse import urlparse
result = []
model = 'SiteInfoDetialScan'
# 输出和记录结果方法
def output(status, link, root, unit, unit_type, title, an_lian, keywords, description, length, server: str,
cms: list | str, s_ip, g, s, sh, q, y) -> list:
global result
if type(cms) is list: cms = ",".join(cms)
cms = cms if cms else ""
res = [status, link, root, unit, unit_type, title, an_lian, keywords, description, length, server, cms,
s_ip, g, s, sh, q, y]
result.append(res)
log(link, "[{}] {}".format(status, title))
return res
# 去除非法字符
def replace_non_printable(s):
res = []
for char in s: res.append(' ') if ord(char) < 32 or ord(char) == 127 else res.append(char)
# ASCII控制字符范围(包括\x00-\x1F和\x7F)
return ''.join(res)
# 主要检测方法
def run(_url: str, cms_: bool = False, icp_: bool = False) -> list:
res = analyze_url(_url, cms_)
if res:
redirect = res.get("redirect")
if redirect:
log(_url, "【Redirect To】 {}".format(res.get("redirect")))
res = analyze_url(redirect if redirect.startswith('http') else f"{_url}{redirect}", cms_)
if res:
# 归属单位
unit = ''
# 单位性质
unit_type = ''
# 根域名
root_d = Function.getRootDomain(_url)
# 域名
host = urlparse(_url).hostname
# 根域名或ip地址
root_ = root_d if root_d else host
# icp信息
if icp_:
_icp = icp(root_)
if _icp:
try:
unit = _icp.get("data")["list"][0]["unitName"]
unit_type = _icp.get("data")["list"][0]["natureName"]
except:
pass
s_ip = ''
_dns = dns(host)
if _dns is None: print(_dns)
if _dns: s_ip = _dns.get('ip')
g, s, sh, q, y = '', '', '', '', ''
s_ip = s_ip[0] if len(s_ip) > 0 else ''
if _dns is None and s_ip == "" and root_d == "": s_ip = host
if "" == s_ip: print(_url, root_d == "", host)
if s_ip:
d = ip(s_ip)
try:
g = d.get("Country")
s = d.get("Province")
sh = d.get("city")
q = d.get("County")
y = d.get("Operator")
except:
pass
cms = ','.join(res.get('CMS')) if res.get('CMS') else ''
anlian = ','.join(res.get("anlian")) if res.get("anlian") else ''
return output(
res.get("status"), res.get("link"), host,
unit, unit_type,
res.get('title').strip().strip("\n"), anlian, res.get("keywords"), res.get("description"),
res.get('length'), res.get('server'),
str(cms), s_ip, g, s, sh, q, y
)
# 输出Excel表格
def outExcel(path: str) -> str:
path = os.path.join(outPath, "{}_{}".format(model, path))
if not os.path.exists(path): os.makedirs(path)
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.append([
"响应码", "URL", "Host", "归属单位", "单位性质", "标题", "敏感内容", "关键词", "页面描述", "响应长度",
"站点服务", "CMS", "IPv4", "IP归属国", "IP归属省", "IP归属市", "IP归属区县", "IP归属运营商"
])
for i in result:
if i is None or len(i) == 0: continue
try:
worksheet.append([
i[0], i[1], i[2], i[3], i[4], replace_non_printable(i[5]), i[6], replace_non_printable(i[7]),
replace_non_printable(i[8]), i[9], i[10], i[11], i[12], i[13], i[14],
i[15], i[16], i[17]
])
except Exception as e:
print("err", e)
file = os.path.join(path, "result.xlsx")
workbook.save(file)
log("output:", file)
return file
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--url', help='单个URL检测')
parser.add_argument('-f', '--file', help='指定url文件(一行一个)')
parser.add_argument('-c', '--cms', help='是否检测CMS,如果目标多的话,比较耗时(bool: false)', default=False,
type=bool)
parser.add_argument('-i', '--icp', help='是否返回ICP,如果目标多的话,比较耗时(bool: false)', default=False,
type=bool)
parser.add_argument('-t', '--thread', help='并发数(int: 5)', default=5, type=int)
args = parser.parse_args()
if args.thread < 1: args.thread = 1
if args.url:
run(args.url, args.cms, args.icp)
elif args.file:
try:
with concurrent.futures.ProcessPoolExecutor(max_workers=args.thread) as executor:
futures = [executor.submit(run, i, args.cms, args.icp) for i in Function.fileGetUrl(args.file)]
result = [i.result() for i in futures]
except KeyboardInterrupt:
program_exit()
else:
parser.print_help()
if len(result) > 0: outExcel(str(int(time.time())))