Skip to content

Commit

Permalink
Dehashed Module (smicallef#1133)
Browse files Browse the repository at this point in the history
* Dehashed Module
  • Loading branch information
krishnasism authored Feb 18, 2021
1 parent fc8e0b8 commit e9490fa
Show file tree
Hide file tree
Showing 2 changed files with 309 additions and 0 deletions.
229 changes: 229 additions & 0 deletions modules/sfp_dehashed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
# -------------------------------------------------------------------------------
# Name: sfp_dehashed
# Purpose: Gather breach data from Dehashed API.
#
# Author: <krishnasis@hotmail.com>
#
# Created: 16-01-2021
# Copyright: (c) Steve Micallef
# Licence: GPL
# -------------------------------------------------------------------------------

import json
import time
import base64
from spiderfoot import SpiderFootEvent, SpiderFootPlugin


class sfp_dehashed(SpiderFootPlugin):

meta = {
'name': "Dehashed",
'summary': "Gather breach data from Dehashed API.",
'flags': ["apikey"],
'useCases': ["Footprint", "Investigate", "Passive"],
'categories': ["Leaks, Dumps and Breaches"],
'dataSource': {
'website': "https://www.dehashed.com/",
'model': "COMMERCIAL_ONLY",
'references': [
"https://www.dehashed.com/docs"
],
'favIcon': "https://www.dehashed.com/assets/img/favicon.ico",
'logo': "https://www.dehashed.com/assets/img/logo.png",
'description': "Have you been compromised? "
"DeHashed provides free deep-web scans and protection against credential leaks. "
"A modern personal asset search engine created for "
"security analysts, journalists, security companies, "
"and everyday people to help secure accounts and provide insight on compromised assets. "
"Free breach alerts & breach notifications.",
}
}

# Default options
opts = {
'api_key_username': '',
'api_key': '',
'per_page': 10000,
'max_pages': 2,
'pause': 1
}

# Option descriptions
optdescs = {
'api_key_username': 'Dehashed username.',
'api_key': 'Dehashed API key.',
'per_page': 'Maximum number of results per page.(Max: 10000)',
'max_pages': 'Maximum number of pages to fetch(Max: 10 pages)',
'pause': 'Number of seconds to wait between each API call.'
}

results = None
errorState = False

def setup(self, sfc, userOpts=dict()):
self.sf = sfc
self.results = self.tempStorage()

for opt in list(userOpts.keys()):
self.opts[opt] = userOpts[opt]

# What events is this module interested in for input
def watchedEvents(self):
return [
"DOMAIN_NAME",
"EMAILADDR"
]

# What events this module produces
def producedEvents(self):
return [
'EMAILADDR',
'EMAILADDR_COMPROMISED',
'PASSWORD_COMPROMISED',
'HASH_COMPROMISED',
'RAW_RIR_DATA'
]

# Query Dehashed
def query(self, event, per_page, start):
if event.eventType == "EMAILADDR":
queryString = f"https://api.dehashed.com/search?query=email:\"{event.data}\"&page={start}&size={self.opts['per_page']}"
if event.eventType == "DOMAIN_NAME":
queryString = f"https://api.dehashed.com/search?query=email:\"@{event.data}\"&page={start}&size={self.opts['per_page']}"

token = (base64.b64encode(self.opts['api_key_username'].encode('utf8') + ":".encode('utf-8') + self.opts['api_key'].encode('utf-8'))).decode('utf-8')
headers = {
'Accept': 'application/json',
'Authorization': f'Basic {token}'
}

res = self.sf.fetchUrl(queryString,
headers=headers,
timeout=15,
useragent=self.opts['_useragent'],
verify=True)

time.sleep(self.opts['pause'])

if res['code'] == "400":
self.sf.error("Too many requests were performed in a small amount of time. Please wait a bit before querying the API.")
time.sleep(5)
res = self.sf.fetchUrl(queryString, headers=headers, timeout=15, useragent=self.opts['_useragent'], verify=True)

if res['code'] == "401":
self.sf.error("Invalid API credentials")
self.errorState = True
return None

if res['code'] != "200":
self.sf.error("Unable to fetch data from Dehashed.")
self.errorState = True
return None

if res['content'] is None:
self.sf.debug('No response from Dehashed')
return None

try:
return json.loads(res['content'])
except Exception as e:
self.sf.debug(f"Error processing JSON response: {e}")
return None

# Handle events sent to this module
def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data

if srcModuleName == self.__name__:
return None

if eventData in self.results:
return None

if self.errorState:
return None

self.results[eventData] = True

self.sf.debug(f"Received event, {eventName}, from {srcModuleName}")

if self.opts['api_key'] == "" or self.opts['api_key_username'] == "":
self.sf.error("You enabled sfp_dehashed but did not set an API key/API Key Username!")
self.errorState = True
return

currentPage = 1
maxPages = self.opts['max_pages']
perPage = self.opts['per_page']

while currentPage <= maxPages:
if self.checkForStop():
return None

if self.errorState:
break

data = self.query(event, perPage, currentPage)

if not data:
return None

breachResults = set()
emailResults = set()

for row in data.get('entries'):
email = row.get('email')
password = row.get('password')
passwordHash = row.get('hashed_password')
leakSource = row.get('database_name', 'Unknown')

if f"{email} [{leakSource}]" in breachResults:
continue

breachResults.add(f"{email} [{leakSource}]")

if eventName == "EMAILADDR":
if email == eventData:
evt = SpiderFootEvent('EMAILADDR_COMPROMISED', f"{email} [{leakSource}]", self.__name__, event)
self.notifyListeners(evt)

if password:
evt = SpiderFootEvent('PASSWORD_COMPROMISED', f"{email}:{password} [{leakSource}]", self.__name__, event)
self.notifyListeners(evt)

if passwordHash:
evt = SpiderFootEvent('HASH_COMPROMISED', f"{email}:{passwordHash} [{leakSource}]", self.__name__, event)
self.notifyListeners(evt)

evt = SpiderFootEvent('RAW_RIR_DATA', str(row), self.__name__, event)
self.notifyListeners(evt)

if eventName == "DOMAIN_NAME":
pevent = SpiderFootEvent("EMAILADDR", email, self.__name__, event)
if email not in emailResults:
self.notifyListeners(pevent)
emailResults.add(email)

evt = SpiderFootEvent('EMAILADDR_COMPROMISED', f"{email} [{leakSource}]", self.__name__, pevent)
self.notifyListeners(evt)

if password:
evt = SpiderFootEvent('PASSWORD_COMPROMISED', f"{email}:{password} [{leakSource}]", self.__name__, pevent)
self.notifyListeners(evt)

if passwordHash:
evt = SpiderFootEvent('HASH_COMPROMISED', f"{email}:{passwordHash} [{leakSource}]", self.__name__, pevent)
self.notifyListeners(evt)

evt = SpiderFootEvent('RAW_RIR_DATA', str(row), self.__name__, pevent)
self.notifyListeners(evt)

currentPage += 1

if data.get('total') < self.opts['per_page']:
return None

# End of sfp_dehashed class
80 changes: 80 additions & 0 deletions test/unit/modules/test_sfp_dehashed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# test_sfp_dehashed.py
import unittest

from modules.sfp_dehashed import sfp_dehashed
from sflib import SpiderFoot
from spiderfoot import SpiderFootEvent, SpiderFootTarget


class TestModuledebounce(unittest.TestCase):
"""
Test modules.sfp_dehashed
"""

default_options = {
'_debug': False, # Debug
'__logging': True, # Logging in general
'__outputfilter': None, # Event types to filter from modules' output
'_useragent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0', # User-Agent to use for HTTP requests
'_dnsserver': '', # Override the default resolver
'_fetchtimeout': 5, # number of seconds before giving up on a fetch
'_internettlds': 'https://publicsuffix.org/list/effective_tld_names.dat',
'_internettlds_cache': 72,
'_genericusers': "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
'__version__': '3.3-DEV',
'__database': 'spiderfoot.test.db', # note: test database file
'__modules__': None, # List of modules. Will be set after start-up.
'_socks1type': '',
'_socks2addr': '',
'_socks3port': '',
'_socks4user': '',
'_socks5pwd': '',
'_torctlport': 9051,
'__logstdout': False
}

def test_opts(self):
module = sfp_dehashed()
self.assertEqual(len(module.opts), len(module.optdescs))

def test_setup(self):
"""
Test setup(self, sfc, userOpts=dict())
"""
sf = SpiderFoot(self.default_options)

module = sfp_dehashed()
module.setup(sf, dict())

def test_watchedEvents_should_return_list(self):
module = sfp_dehashed()
self.assertIsInstance(module.watchedEvents(), list)

def test_producedEvents_should_return_list(self):
module = sfp_dehashed()
self.assertIsInstance(module.producedEvents(), list)

@unittest.skip("todo")
def test_handleEvent(self):
"""
Test handleEvent(self, event)
"""
sf = SpiderFoot(self.default_options)

module = sfp_dehashed()
module.setup(sf, dict())

target_value = 'example target value'
target_type = 'EMAILADDR'
target = SpiderFootTarget(target_value, target_type)
module.setTarget(target)

event_type = 'ROOT'
event_data = 'example data'
event_module = ''
source_event = ''
evt = SpiderFootEvent(event_type, event_data, event_module, source_event)

result = module.handleEvent(evt)

self.assertIsNone(result)

0 comments on commit e9490fa

Please sign in to comment.