Skip to content

Commit

Permalink
added documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
JungeWerther committed Apr 5, 2024
1 parent bb70023 commit cb9057f
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 10 deletions.
2 changes: 2 additions & 0 deletions errors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Error classes and decorators for the APIConnector module."""

# custom error class for all api-connector related errors
class APIConnectorError(BaseException):
"""Base class for APIConnector errors."""
Expand Down
10 changes: 6 additions & 4 deletions helpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Helper functions for the Connect module."""

from itertools import product

def flatten_dict(**d):
"""Flatten a dictionary (one level)."""
"""Flatten a dictionary (one level)."""

keys, values = zip(*d.items())
for instance in product(*(x if isinstance(x, list) else [x] for x in values)):
yield dict(list(zip(keys, instance)))
keys, values = zip(*d.items())
for instance in product(*(x if isinstance(x, list) else [x] for x in values)):
yield dict(list(zip(keys, instance)))
10 changes: 7 additions & 3 deletions parsing.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Parsing methods for different data types"""

import xmltodict

def parse_xml(xml):
def parse_xml(xml: str) -> dict:
"""Parses xml string to dict"""
try:
obj = xmltodict.parse(xml)
except:
print("Warning: Could not parse xml. Check returned xml string in response cache.")
return obj

def parse_html(html):
return html
def parse_html(html: str) -> str:
"""Returns html as string"""
return html
30 changes: 28 additions & 2 deletions regex.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
"""Regex helper functions."""

import re

def get_tokens(s):
"""Find all tokens contained in square brackets in a string."""
return re.findall(r'\[[^\]]+\]', s)

def get_text_between_tokens(s):
"""Split a string by tokens contained in square brackets."""
tokens = re.findall(r'\[([^\]]+)\]', s)
contents = re.split(r'\[[^\]]+\]', s)
contents = [content.strip() for content in contents if content.strip() != '']
return {tokens[i]: contents[i] if i < len(contents) else None for i in range(len(tokens))}

def parse_error_log(log):
"""Parse an error log file, find messages encapsulated in [MESSAGE] tags."""
with open(log, 'r') as f:
content = f.read()

Expand All @@ -19,77 +24,98 @@ def parse_error_log(log):
return [{'message': match[0], 'content': match[1]} for match in matches]

def capture_between_start_and_end(s):
"""Capture the content between [START] and [END] tags in a string."""
match = re.search(r'\[START\](.*?)\[END\]', s, re.DOTALL)
return match.group(1) if match else None

def get_yaml_blocks(file_content, language):
"""Find all yaml blocks in a string."""
pattern = re.compile(r'```'+ re.escape(language) + '(.*?)```', re.DOTALL)
matches = pattern.findall(file_content)
return matches

def find_nth_bracketed_message(s, n: int):
"""Find the nth message encapsulated in square brackets in a string."""
match = re.search(r'(\[[^\]]+\])', s)
if match is None or len(match.groups()) < n:
return None
return match.group(n) if match else None

def trim_assistant(s: str) -> str:
"""Return that which comes after the <|assistant|> tag."""
match = re.search(r'<\|assistant\|>(.*)', s, re.DOTALL)
return match.group(1) if match else None

def get_type(s: str) -> str:
"""Returns that which is given after the type= tag."""
match = re.search(r'type=(\w+)', s)
return match.group(1) if match else None

def get_file_extension(s: str) -> str:
"""Return the file extension of a filename."""
match = re.search(r'\.(\w+)$', s)
return match.group(1) if match else None

def is_url(s: str) -> bool:
"""Check if a string is a URL. Loose check."""
return re.search(r'https?://', s) is not None

def is_file(s: str) -> bool:
"""Check if a string is a file. Loose check."""
return re.search(r'\.\w+$', s) is not None

def strip_url(s: str) -> str:
"""Remove the protocol from a URL."""
return re.sub(r'https?://', '', s)

def strip_file(s: str) -> str:
"""Remove the file extension from a filename."""
return re.sub(r'\.\w+$', '', s)

def replace_close_open(s: str) -> str:
"""Replace _close with _open in a string."""
return re.sub(r'_close', '_open', s)

def replace_open_to_class(s: str) -> str:
def remove_open(s: str) -> str:
"""Replace _open with nothing in a string."""
return re.sub(r'_open', '', s)

def return_close_bool(s: str) -> bool:
"""Return whether a string contains _close."""
return re.search(r'_close', s) is not None

def contains_v1_or_api(s: str) -> bool:
"""Check if a string contains /v1/ or /api/."""
return bool(re.search(r'/v1/|/api/', s))

def have_same_base_url(url1, url2):
"""Check if two URLs have the same base URL."""
base_url_regex = r'https?://[^/]*'
base_url1 = re.match(base_url_regex, url1).group()
base_url2 = re.match(base_url_regex, url2).group()
return base_url1 == base_url2

def return_escapable_variables(s: str) -> list:
"""Return all variables that can be escaped in a string."""
return re.findall(r'\{(.+?)\}', s)

def list_to_file_path(l: list) -> str:
"""replace all slashes with underscores and join a list of strings with underscores, so it can be saved as a file path"""
return re.sub(r'/', '_', '_'.join(l) + '.json')

def simple_tokenize_words(s: str) -> list:
"""Return the words in a string. Naive."""
return re.findall(r'\w+', s)

def simple_tokenize_words_and_punctuation(s: str) -> list:
"""Return the words and punctuation in a string. Naive."""
return re.findall(r'\b\w+\b', s)

def censor(v: str, c: str):
"""Censor a string by replacing all occurrences of a character with <hidden>."""
c_escaped = re.escape(c)
return re.sub(c_escaped, "<hidden>", v)

def remove_date_literal(s: str) -> str:
return re.sub(r'date-', '', s)
"""Remove the date- prefix from a string."""
return re.sub(r'date-', '', s)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
supabase
supabase
xmltodict

0 comments on commit cb9057f

Please sign in to comment.