Skip to content

Commit

Permalink
dataset v3 try to do inference
Browse files Browse the repository at this point in the history
  • Loading branch information
TristanBilot committed Jun 7, 2023
1 parent 97a01c4 commit 14ef5f8
Show file tree
Hide file tree
Showing 12 changed files with 270 additions and 25 deletions.
1 change: 1 addition & 0 deletions crawler/input.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://stackoverflow.com https://youtube.com http://br-icloud.com.br http://mp3raid.com/music/krizz_kaliko.html http://www.garage-pirenne.be/index.php?option=com_content&view=article&id=70&vsig70_0=15 http://www.pashminaonline.com/pure-pashminas http://google.com http://facebook.com http://twitter.com
47 changes: 47 additions & 0 deletions data/predict/raw/dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
url,depth,is_phishing,status_code,redirects,is_https,is_ip_address,is_error_page,url_length,domain_url_depth,domain_url_length,has_sub_domain,has_at_symbol,dashes_count,path_starts_with_url,is_valid_html,anchors_count,forms_count,javascript_count,self_anchors_count,has_form_with_url,has_iframe,use_mouseover,is_cert_valid,has_dns_record,has_whois,cert_country,cert_reliability,domain_age,domain_end_period,domain_creation_date,refs
http://google.com/,0,false,200,1,false,false,false,18,1,10,false,false,0,false,true,29,1,0,0,true,false,false,,,,,,,,,"[{""url"":""https://www.google.fr/webhp?tab=ww"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.fr/imghp?hl=fr&tab=wi"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://maps.google.fr/maps?hl=fr&tab=wl"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://play.google.com/?hl=fr&tab=w8"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/?tab=w1"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://news.google.com/?tab=wn"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://mail.google.com/mail/?tab=wm"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://drive.google.com/?tab=wo"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.fr/intl/fr/about/products?tab=wh"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://calendar.google.com/calendar?tab=wc"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://translate.google.fr/?hl=fr&tab=wT"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://books.google.fr/?hl=fr&tab=wp"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.fr/shopping?hl=fr&source=og&tab=wf"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.blogger.com/?tab=wj"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.com/finance?tab=we"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://photos.google.com/?tab=wq&pageId=none"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://docs.google.com/document/?usp=docs_alc"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.fr/intl/fr/about/products?tab=wh"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://accounts.google.com/ServiceLogin?hl=fr&passive=true&continue=http://www.google.com/&ec=GAZAAQ"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.fr/preferences?hl=fr"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/preferences?hl=fr"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.fr/history/optout?hl=fr"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/search"",""is_same_domain"":true,""is_anchor"":false,""is_form"":true,""is_iframe"":false},{""url"":""http://google.com/advanced_search?hl=fr&authuser=0"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/ads/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/services/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/about.html"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.com/setprefdomain?prefdom=FR&prev=http://www.google.fr/&sig=6480af66K_LdnPi_AYMkuiEk7XuA_P7kSeYx4%3D"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/policies/privacy/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/policies/terms/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false}]"
http://facebook.com/,0,false,200,3,false,false,false,20,1,12,false,false,0,false,true,3,0,0,0,false,false,true,,,,,,,,,"[{""url"":""https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.google.com%2Fchrome%2Fbrowser%2F&h=AT3T6nJvXHDrlACT1f0OrzwgAULBGZthdbArV4nNofsvVd-hBebct8zU9_qlXv1qDuQtsqekQTCmu0Prc3y_4ILgwKoQ2m7s8rhr5HgGZc0ymgMwfSCS6h6KgKf61p1oTSMUKwWSdW_MGWIC"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.mozilla.org%2Ffirefox%2Fnew%2F%3Futm_source%3Dfacebook%26utm_medium%3Dreferral%26utm_campaign%3Dunsupported-browser-notification&h=AT0tVHDvvfnBmci8zDFRsQhyrcA6C7QPlpYNZwFyoH5SKC-86VQ1SoqIzMkXo_l2c-WXFMxqR9b9uVeW07IUgLlSOuVQbV6b769OQK-15UjYWYral3uussh7AKRZXVm8-uQqt2wPkLMYC2HD"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://facebook.com/mobile"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false}]"
https://youtube.com/,0,false,200,1,true,false,false,20,1,11,false,false,0,false,true,15,0,0,0,false,true,true,,,,,,,,,"[{""url"":""https://accounts.google.com/ServiceLogin?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dfr%26next%3D%252Fsignin_passive%26feature%3Dpassive&hl=fr"",""is_same_domain"":false,""is_anchor"":false,""is_form"":false,""is_iframe"":true},{""url"":""https://youtube.com/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/press/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/copyright/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/t/contact_us/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/creators/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/ads/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://developers.google.com/youtube"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://support.google.com/youtube/contact/FR_Complaints"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/t/terms"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/t/privacy"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/policies/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/howyoutubeworks?utm_campaign=ytgen&utm_source=ythp&utm_medium=LeftNav&utm_content=txt&u=https%3A%2F%2Fwww.youtube.com%2Fhowyoutubeworks%3Futm_source%3Dythp%26utm_medium%3DLeftNav%26utm_campaign%3Dytgen"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/new"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false}]"
https://stackoverflow.com/,0,false,403,0,true,false,true,26,1,17,false,false,0,false,true,1,0,0,0,false,false,false,,,,,,,,,[]
http://mp3raid.com/music/krizz_kaliko.html,0,false,403,0,false,false,true,42,1,11,false,false,0,true,false,0,0,0,0,false,false,false,,,,,,,,,[]
http://google.com/search,1,,200,2,false,false,false,24,1,10,false,false,0,true,true,29,1,0,0,true,false,false,,,,,,,,,[]
http://google.com/intl/fr/policies/privacy/,1,,200,1,false,false,false,43,1,10,false,false,0,false,true,1,0,0,0,false,false,false,,,,,,,,,[]
https://books.google.fr/?hl=fr&tab=wp,1,,200,0,true,false,false,37,2,15,true,false,0,false,false,26,1,0,0,true,false,true,,,,,,,,,[]
https://photos.google.com/?tab=wq&pageId=none,1,,200,1,true,false,false,45,2,17,true,false,0,false,false,26,0,0,1,false,false,false,,,,,,,,,[]
http://www.blogger.com/?tab=wj,1,,200,5,false,false,false,30,2,15,true,false,0,false,true,16,0,0,2,false,false,false,,,,,,,,,[]
https://docs.google.com/document/?usp=docs_alc,1,,200,2,true,false,false,46,2,15,true,false,0,false,true,69,0,1,7,false,false,false,,,,,,,,,[]
http://google.com/intl/fr/ads/,1,,200,2,false,false,false,30,1,10,false,false,0,false,false,56,0,0,1,false,false,false,,,,,,,,,[]
http://google.com/intl/fr/policies/terms/,1,,200,1,false,false,false,41,1,10,false,false,0,false,true,1,0,0,0,false,false,false,,,,,,,,,[]
https://www.google.fr/webhp?tab=ww,1,,200,0,true,false,false,34,2,13,true,false,0,false,true,29,1,0,0,true,false,false,,,,,,,,,[]
http://www.google.fr/imghp?hl=fr&tab=wi,1,,200,0,false,false,false,39,2,13,true,false,0,false,true,28,1,0,0,true,false,false,,,,,,,,,[]
https://accounts.google.com/ServiceLogin?hl=fr&passive=true&continue=http://www.google.com/&ec=GAZAAQ,1,,200,2,true,false,false,101,2,19,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
http://www.google.fr/preferences?hl=fr,1,,200,1,false,false,false,38,2,13,true,false,0,false,true,24,1,0,3,true,false,true,,,,,,,,,[]
http://www.google.com/setprefdomain?prefdom=FR&prev=http://www.google.fr/&sig=6480af66K_LdnPi_AYMkuiEk7XuA_P7kSeYx4%3D,1,,200,1,false,false,false,118,2,14,true,false,0,false,true,29,1,0,0,true,false,false,,,,,,,,,[]
https://mail.google.com/mail/?tab=wm,1,,200,4,true,false,false,36,2,15,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
http://www.google.fr/history/optout?hl=fr,1,,200,4,false,false,false,41,2,13,true,false,0,false,false,8,0,0,0,false,false,true,,,,,,,,,[]
http://google.com/services/,1,,200,1,false,false,false,27,1,10,false,false,0,true,true,1,0,0,0,false,false,false,,,,,,,,,[]
https://drive.google.com/?tab=wo,1,,200,3,true,false,false,32,2,16,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
https://calendar.google.com/calendar?tab=wc,1,,200,4,true,false,false,43,2,19,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
http://google.com/preferences?hl=fr,1,,200,2,false,false,false,35,1,10,false,false,0,false,true,24,1,0,3,true,false,true,,,,,,,,,[]
https://www.google.fr/intl/fr/about/products?tab=wh,1,,200,3,true,false,false,51,2,13,true,false,0,false,true,158,0,0,3,false,false,false,,,,,,,,,[]
http://google.com/advanced_search?hl=fr&authuser=0,1,,200,2,false,false,false,50,1,10,false,false,0,false,true,16,1,0,0,true,false,true,,,,,,,,,[]
http://maps.google.fr/maps?hl=fr&tab=wl,1,,200,3,false,false,false,39,2,14,true,false,0,false,true,0,0,0,0,false,false,false,,,,,,,,,[]
http://google.com/intl/fr/about.html,1,,200,4,false,false,false,36,1,10,false,false,0,false,true,49,0,0,1,false,false,false,,,,,,,,,[]
https://www.youtube.com/?tab=w1,1,,200,0,true,false,false,31,2,15,true,false,0,false,true,15,0,0,0,false,true,true,,,,,,,,,[]
https://www.google.fr/shopping?hl=fr&source=og&tab=wf,1,,200,1,true,false,false,53,2,13,true,false,0,true,false,32,1,0,0,true,false,true,,,,,,,,,[]
https://www.youtube.com/creators/,1,,200,0,true,false,false,33,2,15,true,false,0,false,true,43,1,0,1,false,false,false,,,,,,,,,[]
https://youtube.com/t/privacy,1,,200,2,true,false,false,29,1,11,false,false,0,false,false,252,0,0,0,false,true,true,,,,,,,,,[]
https://developers.google.com/youtube,1,,200,0,true,false,false,37,2,21,true,false,0,true,false,102,1,0,0,true,false,false,,,,,,,,,[]
https://www.youtube.com/about/policies/,1,,200,1,true,false,false,39,2,15,true,false,0,false,false,138,1,0,7,false,false,false,,,,,,,,,[]
https://www.youtube.com/about/press/,1,,200,1,true,false,false,36,2,15,true,false,0,false,true,89,1,0,6,false,false,false,,,,,,,,,[]
https://www.youtube.com/ads/,1,,200,0,true,false,false,28,2,15,true,false,0,true,false,85,0,0,1,false,false,false,,,,,,,,,[]
https://youtube.com/new,1,,200,1,true,false,false,23,1,11,false,false,0,true,true,15,0,0,0,false,true,true,,,,,,,,,[]
https://accounts.google.com/ServiceLogin?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dfr%26next%3D%252Fsignin_passive%26feature%3Dpassive&hl=fr,1,,200,2,true,false,false,236,2,19,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
https://www.youtube.com/about/copyright/,1,,200,1,true,false,false,40,2,15,true,false,0,false,false,118,1,0,5,false,false,false,,,,,,,,,[]
https://www.youtube.com/howyoutubeworks?utm_campaign=ytgen&utm_source=ythp&utm_medium=LeftNav&utm_content=txt&u=https%3A%2F%2Fwww.youtube.com%2Fhowyoutubeworks%3Futm_source%3Dythp%26utm_medium%3DLeftNav%26utm_campaign%3Dytgen,1,,200,1,true,false,false,225,2,15,true,false,0,false,false,141,1,0,1,false,false,false,,,,,,,,,[]
https://www.youtube.com/about/,1,,200,1,true,false,false,30,2,15,true,false,0,false,false,68,0,0,1,false,false,false,,,,,,,,,[]
https://youtube.com/t/contact_us/,1,,200,1,true,false,false,33,1,11,false,false,0,false,true,17,0,0,0,false,false,false,,,,,,,,,[]
https://youtube.com/t/terms,1,,200,1,true,false,false,27,1,11,false,false,0,false,true,54,0,0,0,false,false,false,,,,,,,,,[]
https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.mozilla.org%2Ffirefox%2Fnew%2F%3Futm_source%3Dfacebook%26utm_medium%3Dreferral%26utm_campaign%3Dunsupported-browser-notification&h=AT0tVHDvvfnBmci8zDFRsQhyrcA6C7QPlpYNZwFyoH5SKC-86VQ1SoqIzMkXo_l2c-WXFMxqR9b9uVeW07IUgLlSOuVQbV6b769OQK-15UjYWYral3uussh7AKRZXVm8-uQqt2wPkLMYC2HD,1,,200,1,true,false,false,324,2,14,true,false,0,false,true,3,0,0,0,false,false,true,,,,,,,,,[]
http://facebook.com/mobile,1,,200,3,false,false,false,26,1,12,false,false,0,true,true,3,0,0,0,false,false,true,,,,,,,,,[]
https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.google.com%2Fchrome%2Fbrowser%2F&h=AT3T6nJvXHDrlACT1f0OrzwgAULBGZthdbArV4nNofsvVd-hBebct8zU9_qlXv1qDuQtsqekQTCmu0Prc3y_4ILgwKoQ2m7s8rhr5HgGZc0ymgMwfSCS6h6KgKf61p1oTSMUKwWSdW_MGWIC,1,,200,1,true,false,false,228,2,14,true,false,0,false,true,3,0,0,0,false,false,true,,,,,,,,,[]
Binary file added manifest.xpi.zip
Binary file not shown.
1 change: 1 addition & 0 deletions mqtt
Submodule mqtt added at 713663
3 changes: 2 additions & 1 deletion phishGNN/cross_validation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
from typing import Tuple

import torch
from sklearn.model_selection import StratifiedKFold
Expand All @@ -15,7 +16,7 @@

def cross_validation_with_val_set(dataset, model, loss_fn, folds, epochs, batch_size,
lr, lr_decay_factor, lr_decay_step_size,
weight_decay, logger=None) -> tuple[float, float, float]:
weight_decay, logger=None) -> Tuple[float, float, float]:

val_losses, accs, durations = [], [], []
for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*k_fold(dataset, folds))):
Expand Down
15 changes: 8 additions & 7 deletions phishGNN/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
NAN_VALUE = -1


def read_csv(path: str) -> pd.DataFrame:
def read_csv(path: str, train_test_equilibrum: bool=True) -> pd.DataFrame:
"""Opens the csv dataset as DataFrame and cast types.
"""
date_parser = lambda c: pd.to_datetime(c, format='%Y-%m-%dT%H:%M:%SZ', errors='coerce')
Expand All @@ -22,10 +22,11 @@ def read_csv(path: str) -> pd.DataFrame:
)

# equilibrate dataset classes as 50/50% benign/phishing
nb_phishing = len(df[df['is_phishing'] == 1])
benign = df.index[(df['is_phishing'] == 0)][:nb_phishing]
other = df.index[~(df['is_phishing'] == 0)]
df = pd.concat([df.iloc[benign], df.iloc[other]])
if train_test_equilibrum:
nb_phishing = len(df[df['is_phishing'] == 1])
benign = df.index[(df['is_phishing'] == 0)][:nb_phishing]
other = df.index[~(df['is_phishing'] == 0)]
df = pd.concat([df.iloc[benign], df.iloc[other]])

# cast object dtypes
df['url'] = df['url'].astype('string')
Expand Down Expand Up @@ -115,7 +116,7 @@ def load_every_urls_with_features(df: pd.DataFrame, path: str) -> Tuple[List, Li
return every_urls, X


def load_train_set(csv_file: str) -> Tuple[pd.DataFrame, List[List], List[int]]:
def load_train_set(csv_file: str, train_test_equilibrum: bool=True) -> Tuple[pd.DataFrame, List[List], List[int]]:
"""Opens the csv file in `csv_file` and returns every
features and label of each root url in the dataset.
Expand All @@ -124,7 +125,7 @@ def load_train_set(csv_file: str) -> Tuple[pd.DataFrame, List[List], List[int]]:
X: the list of features (list) of each root url
y: the list of labels (int) of each root url
"""
df = read_csv(csv_file)
df = read_csv(csv_file, train_test_equilibrum=train_test_equilibrum)
df = normalize_features(df)

root_urls = df[~df['is_phishing'].isin([NAN_VALUE])]['url']
Expand Down
7 changes: 4 additions & 3 deletions phishGNN/dataset_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import dataprep
from utils.compute_device import COMPUTE_DEVICE
from utils.utils import normalize_www_prefix
from typing import Tuple, List

print(f'Torch version: {torch.__version__}')
print(f'Compute device: {COMPUTE_DEVICE}')
Expand Down Expand Up @@ -42,12 +43,12 @@ def __init__(
super(PhishingDataset, self).__init__(root, transform, pre_transform)

@property
def raw_file_names(self) -> list[str]:
def raw_file_names(self) -> List[str]:
"""File name of the csv dataset. """
return glob.glob(os.path.join(self.raw_dir, '*'))

@property
def processed_file_names(self) -> list[str]:
def processed_file_names(self) -> List[str]:
return [file + '.pt' for file in self.raw_file_names]

@property
Expand Down Expand Up @@ -89,7 +90,7 @@ def process(self) -> None:
def len(self):
return (len(os.listdir(self.processed_dir)) - 4) // 2

def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> tuple[Tensor, Tensor, Tensor, Tensor, dict]:
def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> Tuple[Tensor, Tensor, Tensor, Tensor, dict]:
"""Builds the required tensors for one graph.
These matrices will be then used for training the GNN.
Expand Down
7 changes: 4 additions & 3 deletions phishGNN/dataset_v2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import glob
import os
from typing import Tuple, List

import pandas as pd
import torch
Expand Down Expand Up @@ -43,12 +44,12 @@ def __init__(
super(PhishingDataset2, self).__init__(root, transform, pre_transform)

@property
def raw_file_names(self) -> list[str]:
def raw_file_names(self) -> List[str]:
"""File name of the csv dataset. """
return glob.glob(os.path.join(self.raw_dir, '*'))

@property
def processed_file_names(self) -> list[str]:
def processed_file_names(self) -> List[str]:
return [file + '.pt' for file in self.raw_file_names]

@property
Expand Down Expand Up @@ -104,7 +105,7 @@ def process(self) -> None:
def len(self):
return (len(os.listdir(self.processed_dir)) - 4) // 2

def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> tuple[Tensor, Tensor, Tensor, Tensor, dict]:
def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> Tuple[Tensor, Tensor, Tensor, Tensor, dict]:
"""Builds the required tensors for one graph.
These matrices will be then used for training the GNN.
Expand Down
Loading

0 comments on commit 14ef5f8

Please sign in to comment.