forked from smicallef/spiderfoot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sfp_ahmia.py
111 lines (90 loc) · 4.15 KB
/
sfp_ahmia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------
# Name: sfp_ahmia
# Purpose: Searches the Tor search engine 'Ahmia' for content related
# to the domain in question.
#
# Author: Steve Micallef <steve@binarypool.com>
#
# Created: 20/06/2017
# Copyright: (c) Steve Micallef 2017
# Licence: GPL
# -------------------------------------------------------------------------------
from sflib import SpiderFoot, SpiderFootPlugin, SpiderFootEvent
import re
class sfp_ahmia(SpiderFootPlugin):
"""Ahmia:Footprint,Investigate:Search Engines::Search Tor 'Ahmia' search engine for mentions of the target domain."""
# Default options
opts = {
# We don't bother with pagination as ahmia seems fairly limited in coverage
'fetchlinks': False
}
# Option descriptions
optdescs = {
'fetchlinks': "Fetch the darknet pages (via TOR, if enabled) to verify they mention your target."
}
# Target
results = list()
def setup(self, sfc, userOpts=dict()):
self.sf = sfc
self.results = list()
for opt in userOpts.keys():
self.opts[opt] = userOpts[opt]
# What events is this module interested in for input
def watchedEvents(self):
return ["DOMAIN_NAME"]
# What events this module produces
# This is to support the end user in selecting modules based on events
# produced.
def producedEvents(self):
return ["DARKNET_MENTION_URL", "SEARCH_ENGINE_WEB_CONTENT"]
def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data
if eventData in self.results:
self.sf.debug("Already did a search for " + eventData + ", skipping.")
return None
else:
self.results.append(eventData)
# Sites hosted on the domain
data = self.sf.fetchUrl("https://ahmia.fi/search/?q=" + eventData,
useragent=self.opts['_useragent'],
timeout=self.opts['_fetchtimeout'])
if data is None or not data.get('content'):
self.sf.info("No results returned from ahmia.fi.")
return None
if "onion_key" in data['content']:
# Check if we've been asked to stop
if self.checkForStop():
return None
# Submit the google results for analysis
evt = SpiderFootEvent("SEARCH_ENGINE_WEB_CONTENT", data['content'],
self.__name__, event)
self.notifyListeners(evt)
links = re.findall("\<li\s+class=\"result\"\s+href=\"(.*?)\"\s+id=\"onion_key\"\>",
data['content'], re.IGNORECASE | re.DOTALL)
for link in links:
if link in self.results:
continue
else:
self.results.append(link)
self.sf.debug("Found a darknet mention: " + link)
if self.sf.urlFQDN(link).endswith(".onion"):
if self.checkForStop():
return None
if self.opts['fetchlinks']:
res = self.sf.fetchUrl(link, timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'])
if res['content'] is None:
self.sf.debug("Ignoring " + link + " as no data returned")
continue
if eventData not in res['content']:
self.sf.debug("Ignoring " + link + " as no mention of " + eventData)
continue
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
else:
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
# End of sfp_ahmia class