Skip to content

Commit

Permalink
sflib: Remove myPath() and move wordlist functions to SpiderFootHelpe…
Browse files Browse the repository at this point in the history
  • Loading branch information
bcoles authored May 29, 2022
1 parent 2bdc11f commit 920651c
Show file tree
Hide file tree
Showing 12 changed files with 175 additions and 99 deletions.
6 changes: 3 additions & 3 deletions modules/sfp_accounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from queue import Empty as QueueEmpty
from queue import Queue

from spiderfoot import SpiderFootEvent, SpiderFootPlugin
from spiderfoot import SpiderFootEvent, SpiderFootHelpers, SpiderFootPlugin


class sfp_accounts(SpiderFootPlugin):
Expand Down Expand Up @@ -73,8 +73,8 @@ def setup(self, sfc, userOpts=dict()):
for opt in list(userOpts.keys()):
self.opts[opt] = userOpts[opt]

self.commonNames = set(self.sf.dictnames())
self.words = set(self.sf.dictwords())
self.commonNames = SpiderFootHelpers.humanNamesFromWordlists()
self.words = SpiderFootHelpers.dictionaryWordsFromWordlists()

content = self.sf.cacheGet("sfaccounts", 48)
if content is None:
Expand Down
4 changes: 2 additions & 2 deletions modules/sfp_binstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import string

from spiderfoot import SpiderFootEvent, SpiderFootPlugin
from spiderfoot import SpiderFootEvent, SpiderFootHelpers, SpiderFootPlugin


class sfp_binstring(SpiderFootPlugin):
Expand Down Expand Up @@ -57,7 +57,7 @@ def setup(self, sfc, userOpts=dict()):
self.results = list()
self.__dataSource__ = "Target Website"

self.d = set(self.sf.dictwords())
self.d = SpiderFootHelpers.dictionaryWordsFromWordlists()

for opt in list(userOpts.keys()):
self.opts[opt] = userOpts[opt]
Expand Down
9 changes: 3 additions & 6 deletions modules/sfp_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import re

from spiderfoot import SpiderFootEvent, SpiderFootPlugin
from spiderfoot import SpiderFootEvent, SpiderFootHelpers, SpiderFootPlugin


class sfp_names(SpiderFootPlugin):
Expand Down Expand Up @@ -46,22 +46,19 @@ class sfp_names(SpiderFootPlugin):
def setup(self, sfc, userOpts=dict()):
self.sf = sfc
self.results = self.tempStorage()
self.d = set(self.sf.dictwords())
self.n = set(self.sf.dictnames())
self.d = SpiderFootHelpers.dictionaryWordsFromWordlists()
self.n = SpiderFootHelpers.humanNamesFromWordlists()

for opt in list(userOpts.keys()):
self.opts[opt] = userOpts[opt]

# What events is this module interested in for input
# * = be notified about all events.
def watchedEvents(self):
return ["TARGET_WEB_CONTENT", "EMAILADDR",
"DOMAIN_WHOIS", "NETBLOCK_WHOIS",
"RAW_RIR_DATA", "RAW_FILE_META_DATA"]

# What events this module produces
# This is to support the end user in selecting modules based on events
# produced.
def producedEvents(self):
return ["HUMAN_NAME"]

Expand Down
11 changes: 4 additions & 7 deletions sf.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def main() -> None:
'_fetchtimeout': 5, # number of seconds before giving up on a fetch
'_internettlds': 'https://publicsuffix.org/list/effective_tld_names.dat',
'_internettlds_cache': 72,
'_genericusers': "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
'_genericusers': ",".join(SpiderFootHelpers.usernamesFromWordlists(['generic-usernames'])),
'__database': f"{SpiderFootHelpers.dataPath()}/spiderfoot.db",
'__modules__': None, # List of modules. Will be set after start-up.
'__correlationrules__': None, # List of correlation rules. Will be set after start-up.
Expand Down Expand Up @@ -134,14 +134,13 @@ def main() -> None:
logListenerSetup(loggingQueue, sfConfig)
logWorkerSetup(loggingQueue)
log = logging.getLogger(f"spiderfoot.{__name__}")
sft = SpiderFoot(sfConfig)

# Add descriptions of the global config options
sfConfig['__globaloptdescs__'] = sfOptdescs

# Load each module in the modules directory with a .py extension
try:
mod_dir = sft.myPath() + '/modules/'
mod_dir = os.path.dirname(os.path.abspath(__file__)) + '/modules/'
sfModules = SpiderFootHelpers.loadModulesAsDict(mod_dir, ['sfp_template.py'])
except BaseException as e:
log.critical(f"Failed to load modules: {e}", exc_info=True)
Expand All @@ -154,7 +153,7 @@ def main() -> None:
# Load each correlation rule in the correlations directory with
# a .yaml extension
try:
correlations_dir = sft.myPath() + '/correlations/'
correlations_dir = os.path.dirname(os.path.abspath(__file__)) + '/correlations/'
correlationRulesRaw = SpiderFootHelpers.loadCorrelationRulesRaw(correlations_dir, ['template.yaml'])
except BaseException as e:
log.critical(f"Failed to load correlation rules: {e}", exc_info=True)
Expand Down Expand Up @@ -475,8 +474,6 @@ def start_web_server(sfWebUiConfig: dict, sfConfig: dict, loggingQueue=None) ->

log.info(f"Starting web server at {web_host}:{web_port} ...")

sf = SpiderFoot(sfConfig)

# Enable access to static files via the web directory
conf = {
'/query': {
Expand All @@ -486,7 +483,7 @@ def start_web_server(sfWebUiConfig: dict, sfConfig: dict, loggingQueue=None) ->
'/static': {
'tools.staticdir.on': True,
'tools.staticdir.dir': 'static',
'tools.staticdir.root': f"{sf.myPath()}/spiderfoot"
'tools.staticdir.root': f"{os.path.dirname(os.path.abspath(__file__))}/spiderfoot"
}
}

Expand Down
55 changes: 0 additions & 55 deletions sflib.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,19 +238,6 @@ def debug(self, message: str) -> None:

self.log.debug(f"{message}", extra={'scanId': self._scanId})

@staticmethod
def myPath() -> str:
"""This will get us the program's directory, even if we are frozen using py2exe.
Returns:
str: Program root directory
"""
# Determine whether we've been compiled by py2exe
if hasattr(sys, "frozen"):
return os.path.dirname(sys.executable)

return os.path.dirname(__file__)

def hashstring(self, string: str) -> str:
"""Returns a SHA256 hash of the specified input.
Expand Down Expand Up @@ -838,48 +825,6 @@ def normalizeDNS(self, res: list) -> list:
ret.append(host)
return ret

def dictwords(self) -> set:
"""Return dictionary words from several language dictionaries.
Returns:
set: words from dictionaries
"""
words = set()

dicts = ["english", "german", "french", "spanish"]

for d in dicts:
try:
with io.open(f"{self.myPath()}/spiderfoot/dicts/ispell/{d}.dict", 'r', encoding='utf8', errors='ignore') as dict_file:
for w in dict_file.readlines():
words.add(w.strip().lower().split('/')[0])
except BaseException as e:
self.debug(f"Could not read dictionary: {e}")
continue

return words

def dictnames(self) -> set:
"""Return list of human names.
Returns:
set: human names
"""
words = set()

dicts = ["names"]

for d in dicts:
try:
with open(f"{self.myPath()}/spiderfoot/dicts/ispell/{d}.dict", 'r') as dict_file:
for w in dict_file.readlines():
words.add(w.strip().lower().split('/')[0])
except BaseException as e:
self.debug(f"Could not read dictionary: {e}")
continue

return words

def resolveHost(self, host: str) -> list:
"""Return a normalised IPv4 resolution of a hostname.
Expand Down
43 changes: 43 additions & 0 deletions spiderfoot/dicts/generic-usernames.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
abuse
admin
billing
compliance
devnull
dns
ftp
hostmaster
inoc
ispfeedback
ispsupport
list-request
list
maildaemon
marketing
noc
no-reply
noreply
null
peering
peering-notify
peering-request
phish
phishing
postmaster
privacy
registrar
registry
root
routing-registry
rr
sales
security
spam
support
sysadmin
tech
undisclosed-recipients
unsubscribe
usenet
uucp
webmaster
www
85 changes: 85 additions & 0 deletions spiderfoot/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import urllib.parse
import uuid
from pathlib import Path
from importlib import resources

import networkx as nx
from bs4 import BeautifulSoup, SoupStrainer
Expand Down Expand Up @@ -290,6 +291,90 @@ def urlBaseUrl(url: str) -> str:

return bits.group(1).lower()

@staticmethod
def dictionaryWordsFromWordlists(wordlists: list = None) -> set:
"""Return dictionary words from several language dictionaries.
Args:
wordlists (list): list of wordlist file names to read (excluding file extension).
Returns:
set: words from dictionaries
Raises:
IOError: Error reading wordlist file
"""
words = set()

if wordlists is None:
wordlists = ["english", "german", "french", "spanish"]

for d in wordlists:
try:
with resources.open_text('spiderfoot.dicts.ispell', f"{d}.dict", errors='ignore') as dict_file:
for w in dict_file.readlines():
words.add(w.strip().lower().split('/')[0])
except BaseException as e:
raise IOError(f"Could not read wordlist file '{d}.dict'") from e

return words

@staticmethod
def humanNamesFromWordlists(wordlists: list = None) -> set:
"""Return list of human names from wordlist file.
Args:
wordlists (list): list of wordlist file names to read (excluding file extension).
Returns:
set: human names from wordlists
Raises:
IOError: Error reading wordlist file
"""
words = set()

if wordlists is None:
wordlists = ["names"]

for d in wordlists:
try:
with resources.open_text('spiderfoot.dicts.ispell', f"{d}.dict", errors='ignore') as dict_file:
for w in dict_file.readlines():
words.add(w.strip().lower().split('/')[0])
except BaseException as e:
raise IOError(f"Could not read wordlist file '{d}.dict'") from e

return words

@staticmethod
def usernamesFromWordlists(wordlists: list = None) -> set:
"""Return list of usernames from wordlist file.
Args:
wordlists (list): list of wordlist file names to read (excluding file extension).
Returns:
set: usernames from wordlists
Raises:
IOError: Error reading wordlist file
"""
words = set()

if wordlists is None:
wordlists = ["generic-usernames"]

for d in wordlists:
try:
with resources.open_text('spiderfoot.dicts', f"{d}.txt", errors='ignore') as dict_file:
for w in dict_file.readlines():
words.add(w.strip().lower().split('/')[0])
except BaseException as e:
raise IOError(f"Could not read wordlist file '{d}.txt'") from e

return words

@staticmethod
def buildGraphGexf(root: str, title: str, data: list, flt: list = None) -> str:
"""Convert supplied raw data into GEXF (Graph Exchange XML Format) format (e.g. for Gephi).
Expand Down
2 changes: 1 addition & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def default_options(request):
'_fetchtimeout': 5, # number of seconds before giving up on a fetch
'_internettlds': 'https://publicsuffix.org/list/effective_tld_names.dat',
'_internettlds_cache': 72,
'_genericusers': "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
'_genericusers': ",".join(SpiderFootHelpers.usernamesFromWordlists(['generic-usernames'])),
'__database': f"{SpiderFootHelpers.dataPath()}/spiderfoot.test.db", # note: test database file
'__modules__': None, # List of modules. Will be set after start-up.
'__correlationrules__': None, # List of correlation rules. Will be set after start-up.
Expand Down
10 changes: 5 additions & 5 deletions test/integration/test_sfwebui.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# test_sfwebui.py
import os
import unittest

import cherrypy
from cherrypy.test import helper

from spiderfoot import SpiderFootHelpers
from sflib import SpiderFoot
from sfwebui import SpiderFootWebUi


Expand All @@ -21,9 +21,10 @@ def setup_server():
'_fetchtimeout': 5, # number of seconds before giving up on a fetch
'_internettlds': 'https://publicsuffix.org/list/effective_tld_names.dat',
'_internettlds_cache': 72,
'_genericusers': "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
'_genericusers': ",".join(SpiderFootHelpers.usernamesFromWordlists(['generic-usernames'])),
'__database': f"{SpiderFootHelpers.dataPath()}/spiderfoot.test.db", # note: test database file
'__modules__': None, # List of modules. Will be set after start-up.
'__correlationrules__': None, # List of correlation rules. Will be set after start-up.
'_socks1type': '',
'_socks2addr': '',
'_socks3port': '',
Expand All @@ -36,8 +37,7 @@ def setup_server():
'root': '/'
}

sf = SpiderFoot(default_config)
mod_dir = sf.myPath() + '/modules/'
mod_dir = os.path.dirname(os.path.abspath(__file__)) + '/../../modules/'
default_config['__modules__'] = SpiderFootHelpers.loadModulesAsDict(mod_dir, ['sfp_template.py'])

conf = {
Expand All @@ -48,7 +48,7 @@ def setup_server():
'/static': {
'tools.staticdir.on': True,
'tools.staticdir.dir': 'static',
'tools.staticdir.root': f"{sf.myPath()}/spiderfoot",
'tools.staticdir.root': f"{os.path.dirname(os.path.abspath(__file__))}/../../spiderfoot",
}
}

Expand Down
Loading

0 comments on commit 920651c

Please sign in to comment.