被动信息搜集:复现MS08067实验室邮件爬取
在针对目标系统进行渗透的过程中,如果目标服务器安全性很高,通过服务器很难获取目标权限时,通常会采用社工的方式对目标服务进行进一步攻击。邮件钓鱼攻击时常见的攻击方式之一。在进行钓鱼之前,需要针对目标相关人员的邮件信息进行全面采集。现分享MS08067实验室编写的邮件采集工具,此邮件采集工具通过国内常见的搜索引擎(百度、Bing等)进行搜集。
运行环境:
OS: macOS Monterey Version 12.3.1 (英文版)
IDE:PyCharm 2020.1
源码如下:
#_*_coding:utf-8_*_
# 作者 :liuxiaowei
# 创建时间 :4/15/22 8:45 PM
# 文件 :邮件爬取.py
# IDE :PyCharm
# 导入相关模块
import sys
import getopt
import requests
from bs4 import BeautifulSoup
import re
# 定义启动函数
def start(argv):
url = ""
pages = ""
if len(sys.argv) < 2:
print("-h 帮助信息;\n")
sys.exit()
# 定义异常处理
try:
banner()
opts, args = getopt.getopt(argv, "-u:-p:-h")
except getopt.GetoptError:
print("Error an argument!")
sys.exit()
# 遍历选项列表,元素为元组('-u', url),('-p', '1')
for opt, arg in opts:
if opt == "-u":
url = arg
elif opt == "-p":
pages = arg
elif opt == "-h":
print(usage())
launcher(url, pages)
# banner 信息
def banner():
print('\033[1;34m#################################################################################\033[0m\n'
'\033[1;34m###############################\033[0m\033[1;32m MS08067 实验室\033[1;34m###################################\033[0m\n'
'\033[1;34m#################################################################################\033[0m\n')
# 使用规则
def usage():
print('\t\t-h: --help 帮助;')
print('\t\t-u: --url 域名;')
print('\t\t-p: --pages 页数;')
print('\t\teg: python -u "www.baidu.com" -p 100' + '\n')
sys.exit()
# 漏洞回调函数
def launcher(url, pages):
email_num = []
# 关键词列表,元素为email等
key_words = ['email', 'mail', 'mailbox', '邮件', '邮箱', 'postbox']
# 遍历指定数量的页面
for page in range(1, int(pages) +1):
# 遍历关键词列表
for key_word in key_words:
# 调用bing搜索函数,参数为url,页面数,关键词
bing_emails = bing_search(url, page, key_word)
# 调用百度搜索函数,参数为url,页面数,关键词
baidu_emails = baidu_search(url, page, key_word)
# 把两个搜索返回的结果合并赋值给sum_emails
sum_emails = bing_emails + baidu_emails
# 遍历所有邮件
for email in sum_emails:
# 去重
if email in email_num:
pass
else:
# 输出邮箱
print(email)
with open('data.txt', 'a+') as f:
f.write(f'爬取的邮箱信息:{email}' + '\n')
email_num.append(email)
# 定义bing搜索函数,参数为url,page(页面数), 关键词
def bing_search(url, page, key_word):
# 指定referer
referer = 'http://cn.bing.com/search?q=email+site%3abaidu.com&qs=n&sp=-1&pq=emailsite%3abaidu.com&first=1&FORM=PERE1'
# 创建请求会话
conn = requests.session()
# 利用url, page, key_word拼接并赋值给bing_url
bing_url = 'http://cn.bing.com/search?q=' + key_word + '+site%3a' + url + '&qs=n&sp=-1&pq=' + key_word + 'site%3a' + url +'&first=' + str((page-1)*10) + '&FORM=PERE1'
# get请求
conn.get('http://cn.bing.com', headers=headers(referer))
# get请求,返回值赋值给r
r = conn.get(bing_url, stream=True, headers=headers(referer), timeout=8)
emails = search_email(r.text)
return emails
# 定义百度搜索函数
def baidu_search(url, page, key_word):
email_list = []
emails = []
referer = 'https://www.baidu.com/s?wd=email+site%3Abaidu.com&pn=1'
baidu_url = 'https://www.baidu.com/s?wd='+key_word+'site%3A'+url + '&pn='+str((page-1)*10)
conn = requests.session()
conn.get(referer, headers=headers(referer))
r = conn.get(baidu_url, headers=headers(referer))
soup = BeautifulSoup(r.text, 'lxml')
tagh3 = soup.find_all('h3')
for h3 in tagh3:
href = h3.find('a').get('href')
try:
r = requests.get(href, headers = headers(referer), timeout=8)
emails = search_email(r.text)
except Exception as e:
pass
for email in emails:
email_list.append(email)
return email_list
# 搜索邮箱函数,参数为html
def search_email(html):
emails = re.findall(r'[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+', html, re.I)
return emails
# 定义一个头函数,参数referer
def headers(referer):
headers = {
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36",
"Accept": "*/*",
"Accept-Language": "en-US, en;q=0.5",
"Accept-Encoding": "gzip, deflate",
"Referer": referer
}
return headers
# 主程序
if __name__ == "__main__":
try:
start(sys.argv[1:])
except KeyboardInterrupt:
print('interrupted by user, killing all threads.....')
运行结果如下图: