From 18f35f00217d67b48b67a23bff049171f73318fe Mon Sep 17 00:00:00 2001
From: Tristan <bilot.tristan@hotmail.fr>
Date: Wed, 7 Jun 2023 03:19:50 -0400
Subject: [PATCH] Revert "dataset v3 try to do inference"

This reverts commit 14ef5f8d1b52e2dc6126e6035b7befe5b682656f.
---
 crawler/input.txt            |   1 -
 data/predict/raw/dataset.csv |  47 ---------
 manifest.xpi.zip             | Bin 899 -> 0 bytes
 mqtt                         |   1 -
 phishGNN/cross_validation.py |   3 +-
 phishGNN/dataprep.py         |  15 ++-
 phishGNN/dataset_v1.py       |   7 +-
 phishGNN/dataset_v2.py       |   7 +-
 phishGNN/dataset_v3.py       | 192 -----------------------------------
 phishGNN/other_models.py     |   2 +-
 phishGNN/predict.py          |  16 +--
 phishGNN/utils/utils.py      |   4 +-
 12 files changed, 25 insertions(+), 270 deletions(-)
 delete mode 100644 crawler/input.txt
 delete mode 100644 data/predict/raw/dataset.csv
 delete mode 100644 manifest.xpi.zip
 delete mode 160000 mqtt
 delete mode 100644 phishGNN/dataset_v3.py

diff --git a/crawler/input.txt b/crawler/input.txt
deleted file mode 100644
index 62ef4dc..0000000
--- a/crawler/input.txt
+++ /dev/null
@@ -1 +0,0 @@
-https://stackoverflow.com https://youtube.com http://br-icloud.com.br http://mp3raid.com/music/krizz_kaliko.html http://www.garage-pirenne.be/index.php?option=com_content&view=article&id=70&vsig70_0=15 http://www.pashminaonline.com/pure-pashminas http://google.com http://facebook.com http://twitter.com
diff --git a/data/predict/raw/dataset.csv b/data/predict/raw/dataset.csv
deleted file mode 100644
index 09f129e..0000000
--- a/data/predict/raw/dataset.csv
+++ /dev/null
@@ -1,47 +0,0 @@
-url,depth,is_phishing,status_code,redirects,is_https,is_ip_address,is_error_page,url_length,domain_url_depth,domain_url_length,has_sub_domain,has_at_symbol,dashes_count,path_starts_with_url,is_valid_html,anchors_count,forms_count,javascript_count,self_anchors_count,has_form_with_url,has_iframe,use_mouseover,is_cert_valid,has_dns_record,has_whois,cert_country,cert_reliability,domain_age,domain_end_period,domain_creation_date,refs
-http://google.com/,0,false,200,1,false,false,false,18,1,10,false,false,0,false,true,29,1,0,0,true,false,false,,,,,,,,,"[{""url"":""https://www.google.fr/webhp?tab=ww"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.fr/imghp?hl=fr&tab=wi"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://maps.google.fr/maps?hl=fr&tab=wl"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://play.google.com/?hl=fr&tab=w8"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/?tab=w1"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://news.google.com/?tab=wn"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://mail.google.com/mail/?tab=wm"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://drive.google.com/?tab=wo"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.fr/intl/fr/about/products?tab=wh"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://calendar.google.com/calendar?tab=wc"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://translate.google.fr/?hl=fr&tab=wT"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://books.google.fr/?hl=fr&tab=wp"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.fr/shopping?hl=fr&source=og&tab=wf"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.blogger.com/?tab=wj"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.com/finance?tab=we"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://photos.google.com/?tab=wq&pageId=none"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://docs.google.com/document/?usp=docs_alc"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.google.fr/intl/fr/about/products?tab=wh"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://accounts.google.com/ServiceLogin?hl=fr&passive=true&continue=http://www.google.com/&ec=GAZAAQ"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.fr/preferences?hl=fr"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/preferences?hl=fr"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.fr/history/optout?hl=fr"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/search"",""is_same_domain"":true,""is_anchor"":false,""is_form"":true,""is_iframe"":false},{""url"":""http://google.com/advanced_search?hl=fr&authuser=0"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/ads/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/services/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/about.html"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://www.google.com/setprefdomain?prefdom=FR&prev=http://www.google.fr/&sig=6480af66K_LdnPi_AYMkuiEk7XuA_P7kSeYx4%3D"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/policies/privacy/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://google.com/intl/fr/policies/terms/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false}]"
-http://facebook.com/,0,false,200,3,false,false,false,20,1,12,false,false,0,false,true,3,0,0,0,false,false,true,,,,,,,,,"[{""url"":""https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.google.com%2Fchrome%2Fbrowser%2F&h=AT3T6nJvXHDrlACT1f0OrzwgAULBGZthdbArV4nNofsvVd-hBebct8zU9_qlXv1qDuQtsqekQTCmu0Prc3y_4ILgwKoQ2m7s8rhr5HgGZc0ymgMwfSCS6h6KgKf61p1oTSMUKwWSdW_MGWIC"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.mozilla.org%2Ffirefox%2Fnew%2F%3Futm_source%3Dfacebook%26utm_medium%3Dreferral%26utm_campaign%3Dunsupported-browser-notification&h=AT0tVHDvvfnBmci8zDFRsQhyrcA6C7QPlpYNZwFyoH5SKC-86VQ1SoqIzMkXo_l2c-WXFMxqR9b9uVeW07IUgLlSOuVQbV6b769OQK-15UjYWYral3uussh7AKRZXVm8-uQqt2wPkLMYC2HD"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""http://facebook.com/mobile"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false}]"
-https://youtube.com/,0,false,200,1,true,false,false,20,1,11,false,false,0,false,true,15,0,0,0,false,true,true,,,,,,,,,"[{""url"":""https://accounts.google.com/ServiceLogin?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dfr%26next%3D%252Fsignin_passive%26feature%3Dpassive&hl=fr"",""is_same_domain"":false,""is_anchor"":false,""is_form"":false,""is_iframe"":true},{""url"":""https://youtube.com/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/press/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/copyright/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/t/contact_us/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/creators/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/ads/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://developers.google.com/youtube"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://support.google.com/youtube/contact/FR_Complaints"",""is_same_domain"":false,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/t/terms"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/t/privacy"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/about/policies/"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://www.youtube.com/howyoutubeworks?utm_campaign=ytgen&utm_source=ythp&utm_medium=LeftNav&utm_content=txt&u=https%3A%2F%2Fwww.youtube.com%2Fhowyoutubeworks%3Futm_source%3Dythp%26utm_medium%3DLeftNav%26utm_campaign%3Dytgen"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false},{""url"":""https://youtube.com/new"",""is_same_domain"":true,""is_anchor"":true,""is_form"":false,""is_iframe"":false}]"
-https://stackoverflow.com/,0,false,403,0,true,false,true,26,1,17,false,false,0,false,true,1,0,0,0,false,false,false,,,,,,,,,[]
-http://mp3raid.com/music/krizz_kaliko.html,0,false,403,0,false,false,true,42,1,11,false,false,0,true,false,0,0,0,0,false,false,false,,,,,,,,,[]
-http://google.com/search,1,,200,2,false,false,false,24,1,10,false,false,0,true,true,29,1,0,0,true,false,false,,,,,,,,,[]
-http://google.com/intl/fr/policies/privacy/,1,,200,1,false,false,false,43,1,10,false,false,0,false,true,1,0,0,0,false,false,false,,,,,,,,,[]
-https://books.google.fr/?hl=fr&tab=wp,1,,200,0,true,false,false,37,2,15,true,false,0,false,false,26,1,0,0,true,false,true,,,,,,,,,[]
-https://photos.google.com/?tab=wq&pageId=none,1,,200,1,true,false,false,45,2,17,true,false,0,false,false,26,0,0,1,false,false,false,,,,,,,,,[]
-http://www.blogger.com/?tab=wj,1,,200,5,false,false,false,30,2,15,true,false,0,false,true,16,0,0,2,false,false,false,,,,,,,,,[]
-https://docs.google.com/document/?usp=docs_alc,1,,200,2,true,false,false,46,2,15,true,false,0,false,true,69,0,1,7,false,false,false,,,,,,,,,[]
-http://google.com/intl/fr/ads/,1,,200,2,false,false,false,30,1,10,false,false,0,false,false,56,0,0,1,false,false,false,,,,,,,,,[]
-http://google.com/intl/fr/policies/terms/,1,,200,1,false,false,false,41,1,10,false,false,0,false,true,1,0,0,0,false,false,false,,,,,,,,,[]
-https://www.google.fr/webhp?tab=ww,1,,200,0,true,false,false,34,2,13,true,false,0,false,true,29,1,0,0,true,false,false,,,,,,,,,[]
-http://www.google.fr/imghp?hl=fr&tab=wi,1,,200,0,false,false,false,39,2,13,true,false,0,false,true,28,1,0,0,true,false,false,,,,,,,,,[]
-https://accounts.google.com/ServiceLogin?hl=fr&passive=true&continue=http://www.google.com/&ec=GAZAAQ,1,,200,2,true,false,false,101,2,19,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
-http://www.google.fr/preferences?hl=fr,1,,200,1,false,false,false,38,2,13,true,false,0,false,true,24,1,0,3,true,false,true,,,,,,,,,[]
-http://www.google.com/setprefdomain?prefdom=FR&prev=http://www.google.fr/&sig=6480af66K_LdnPi_AYMkuiEk7XuA_P7kSeYx4%3D,1,,200,1,false,false,false,118,2,14,true,false,0,false,true,29,1,0,0,true,false,false,,,,,,,,,[]
-https://mail.google.com/mail/?tab=wm,1,,200,4,true,false,false,36,2,15,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
-http://www.google.fr/history/optout?hl=fr,1,,200,4,false,false,false,41,2,13,true,false,0,false,false,8,0,0,0,false,false,true,,,,,,,,,[]
-http://google.com/services/,1,,200,1,false,false,false,27,1,10,false,false,0,true,true,1,0,0,0,false,false,false,,,,,,,,,[]
-https://drive.google.com/?tab=wo,1,,200,3,true,false,false,32,2,16,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
-https://calendar.google.com/calendar?tab=wc,1,,200,4,true,false,false,43,2,19,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
-http://google.com/preferences?hl=fr,1,,200,2,false,false,false,35,1,10,false,false,0,false,true,24,1,0,3,true,false,true,,,,,,,,,[]
-https://www.google.fr/intl/fr/about/products?tab=wh,1,,200,3,true,false,false,51,2,13,true,false,0,false,true,158,0,0,3,false,false,false,,,,,,,,,[]
-http://google.com/advanced_search?hl=fr&authuser=0,1,,200,2,false,false,false,50,1,10,false,false,0,false,true,16,1,0,0,true,false,true,,,,,,,,,[]
-http://maps.google.fr/maps?hl=fr&tab=wl,1,,200,3,false,false,false,39,2,14,true,false,0,false,true,0,0,0,0,false,false,false,,,,,,,,,[]
-http://google.com/intl/fr/about.html,1,,200,4,false,false,false,36,1,10,false,false,0,false,true,49,0,0,1,false,false,false,,,,,,,,,[]
-https://www.youtube.com/?tab=w1,1,,200,0,true,false,false,31,2,15,true,false,0,false,true,15,0,0,0,false,true,true,,,,,,,,,[]
-https://www.google.fr/shopping?hl=fr&source=og&tab=wf,1,,200,1,true,false,false,53,2,13,true,false,0,true,false,32,1,0,0,true,false,true,,,,,,,,,[]
-https://www.youtube.com/creators/,1,,200,0,true,false,false,33,2,15,true,false,0,false,true,43,1,0,1,false,false,false,,,,,,,,,[]
-https://youtube.com/t/privacy,1,,200,2,true,false,false,29,1,11,false,false,0,false,false,252,0,0,0,false,true,true,,,,,,,,,[]
-https://developers.google.com/youtube,1,,200,0,true,false,false,37,2,21,true,false,0,true,false,102,1,0,0,true,false,false,,,,,,,,,[]
-https://www.youtube.com/about/policies/,1,,200,1,true,false,false,39,2,15,true,false,0,false,false,138,1,0,7,false,false,false,,,,,,,,,[]
-https://www.youtube.com/about/press/,1,,200,1,true,false,false,36,2,15,true,false,0,false,true,89,1,0,6,false,false,false,,,,,,,,,[]
-https://www.youtube.com/ads/,1,,200,0,true,false,false,28,2,15,true,false,0,true,false,85,0,0,1,false,false,false,,,,,,,,,[]
-https://youtube.com/new,1,,200,1,true,false,false,23,1,11,false,false,0,true,true,15,0,0,0,false,true,true,,,,,,,,,[]
-https://accounts.google.com/ServiceLogin?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dfr%26next%3D%252Fsignin_passive%26feature%3Dpassive&hl=fr,1,,200,2,true,false,false,236,2,19,true,false,0,false,false,6,2,0,0,true,false,true,,,,,,,,,[]
-https://www.youtube.com/about/copyright/,1,,200,1,true,false,false,40,2,15,true,false,0,false,false,118,1,0,5,false,false,false,,,,,,,,,[]
-https://www.youtube.com/howyoutubeworks?utm_campaign=ytgen&utm_source=ythp&utm_medium=LeftNav&utm_content=txt&u=https%3A%2F%2Fwww.youtube.com%2Fhowyoutubeworks%3Futm_source%3Dythp%26utm_medium%3DLeftNav%26utm_campaign%3Dytgen,1,,200,1,true,false,false,225,2,15,true,false,0,false,false,141,1,0,1,false,false,false,,,,,,,,,[]
-https://www.youtube.com/about/,1,,200,1,true,false,false,30,2,15,true,false,0,false,false,68,0,0,1,false,false,false,,,,,,,,,[]
-https://youtube.com/t/contact_us/,1,,200,1,true,false,false,33,1,11,false,false,0,false,true,17,0,0,0,false,false,false,,,,,,,,,[]
-https://youtube.com/t/terms,1,,200,1,true,false,false,27,1,11,false,false,0,false,true,54,0,0,0,false,false,false,,,,,,,,,[]
-https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.mozilla.org%2Ffirefox%2Fnew%2F%3Futm_source%3Dfacebook%26utm_medium%3Dreferral%26utm_campaign%3Dunsupported-browser-notification&h=AT0tVHDvvfnBmci8zDFRsQhyrcA6C7QPlpYNZwFyoH5SKC-86VQ1SoqIzMkXo_l2c-WXFMxqR9b9uVeW07IUgLlSOuVQbV6b769OQK-15UjYWYral3uussh7AKRZXVm8-uQqt2wPkLMYC2HD,1,,200,1,true,false,false,324,2,14,true,false,0,false,true,3,0,0,0,false,false,true,,,,,,,,,[]
-http://facebook.com/mobile,1,,200,3,false,false,false,26,1,12,false,false,0,true,true,3,0,0,0,false,false,true,,,,,,,,,[]
-https://l.facebook.com/l.php?u=https%3A%2F%2Fwww.google.com%2Fchrome%2Fbrowser%2F&h=AT3T6nJvXHDrlACT1f0OrzwgAULBGZthdbArV4nNofsvVd-hBebct8zU9_qlXv1qDuQtsqekQTCmu0Prc3y_4ILgwKoQ2m7s8rhr5HgGZc0ymgMwfSCS6h6KgKf61p1oTSMUKwWSdW_MGWIC,1,,200,1,true,false,false,228,2,14,true,false,0,false,true,3,0,0,0,false,false,true,,,,,,,,,[]
diff --git a/manifest.xpi.zip b/manifest.xpi.zip
deleted file mode 100644
index 8f329c0304a78e5733b38ca7b5f8965d4d510828..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 899
zcmWIWW@Zs#-~htAc~M~uP|(7}z#zn+z>u4mmzkDYT%wSiUr?!+Rh*v}8p6xKzBuf6
za{1@F6c8@0;AUWC`O3(^05pezAvN%H-faVcJ>oCybxI5cOo}cqj{P!0pW~MUf1_f+
zms@eOl_K9=auj&deCPg${#NZ<ecetC9%s+JdD9nXSI3Ys?SxsN2-m)uj-S0x2ulBL
zQ9UI5?fyr9H?e)?2j87ouh=%HVNZfrVh5*UbxTOZ@%>F3jvrxVRFiy~`#JNAWY6-@
zmzOmc&Z=E0n$H&Untl59Cl8N*Oej{elzU+QP5tZc6+5%-=Q~KB5{R=p7Wm=L(FZ5&
zv^Bp(Zz(!`@=(jgwQ9vPQqHHfZhs;>wb5Lu&}{C#9`S3pQ{pD?2s*guM#$!lOoeNT
zJ0xth!Y+KT(RwTYV$O}FU#?nQfBNLdZC0bOjDCCH`x+ZGHrOq_AiX<!`GmilZtR_N
zqx3*wgSOM|r)h<G5~iBhVx(p?F6rTF4vT-SR?Fqzvobil_sgA{PVLEaOH#5gRDBA$
zpSvgOk&m!Y`2X8&6~^y=$*Z@`SX{*ByzlOb2ggj~Ur&2l!5rYt&XKW(<9#MD<U!FD
zfHfj_1EWM17?JVuzK+iR!4dj;@dV<u7Bfzh6GW5K6Os}>`1*u>;1BBvXkeNk!K^MI
z%~+@|(EMY!WCv59qM~Dfn#XY-g)q0^LmdL*zUR(g_14ii;dxbC$5+SejP}Vh-l-Z_
z%nddeY%()8H1;&{FxkX+(xZ2!ff2`9Zym35Cv?1ZH>H6*!p_lc@pnre(CHxO2Y53w
zi7+G5D6*$PX%q&wG=f+Je1|2)qMMIwHz?>}U`yjkMh40PGQgXa4P+b>5S|9oi-7UQ
GzyJUg%S<}}

diff --git a/mqtt b/mqtt
deleted file mode 160000
index 7136637..0000000
--- a/mqtt
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 7136637bd047ecefd4aa97839ad2cea42008e3f7
diff --git a/phishGNN/cross_validation.py b/phishGNN/cross_validation.py
index 24a9117..2d01498 100644
--- a/phishGNN/cross_validation.py
+++ b/phishGNN/cross_validation.py
@@ -1,5 +1,4 @@
 import time
-from typing import Tuple
 
 import torch
 from sklearn.model_selection import StratifiedKFold
@@ -16,7 +15,7 @@
 
 def cross_validation_with_val_set(dataset, model, loss_fn, folds, epochs, batch_size,
                                   lr, lr_decay_factor, lr_decay_step_size,
-                                  weight_decay, logger=None) -> Tuple[float, float, float]:
+                                  weight_decay, logger=None) -> tuple[float, float, float]:
 
     val_losses, accs, durations = [], [], []
     for fold, (train_idx, test_idx, val_idx) in enumerate(zip(*k_fold(dataset, folds))):
diff --git a/phishGNN/dataprep.py b/phishGNN/dataprep.py
index 9c6e55c..f3224e8 100644
--- a/phishGNN/dataprep.py
+++ b/phishGNN/dataprep.py
@@ -10,7 +10,7 @@
 NAN_VALUE = -1
 
 
-def read_csv(path: str, train_test_equilibrum: bool=True) -> pd.DataFrame:
+def read_csv(path: str) -> pd.DataFrame:
     """Opens the csv dataset as DataFrame and cast types.
     """
     date_parser = lambda c: pd.to_datetime(c, format='%Y-%m-%dT%H:%M:%SZ', errors='coerce')
@@ -22,11 +22,10 @@ def read_csv(path: str, train_test_equilibrum: bool=True) -> pd.DataFrame:
     )
 
     # equilibrate dataset classes as 50/50% benign/phishing
-    if train_test_equilibrum:
-        nb_phishing = len(df[df['is_phishing'] == 1])
-        benign = df.index[(df['is_phishing'] == 0)][:nb_phishing]
-        other = df.index[~(df['is_phishing'] == 0)]
-        df = pd.concat([df.iloc[benign], df.iloc[other]])
+    nb_phishing = len(df[df['is_phishing'] == 1])
+    benign = df.index[(df['is_phishing'] == 0)][:nb_phishing]
+    other = df.index[~(df['is_phishing'] == 0)]
+    df = pd.concat([df.iloc[benign], df.iloc[other]])
 
     # cast object dtypes
     df['url'] = df['url'].astype('string')
@@ -116,7 +115,7 @@ def load_every_urls_with_features(df: pd.DataFrame, path: str) -> Tuple[List, Li
     return every_urls, X
 
 
-def load_train_set(csv_file: str, train_test_equilibrum: bool=True) -> Tuple[pd.DataFrame, List[List], List[int]]:
+def load_train_set(csv_file: str) -> Tuple[pd.DataFrame, List[List], List[int]]:
     """Opens the csv file in `csv_file` and returns every
     features and label of each root url in the dataset.
 
@@ -125,7 +124,7 @@ def load_train_set(csv_file: str, train_test_equilibrum: bool=True) -> Tuple[pd.
         X: the list of features (list) of each root url
         y: the list of labels (int) of each root url
     """
-    df = read_csv(csv_file, train_test_equilibrum=train_test_equilibrum)
+    df = read_csv(csv_file)
     df = normalize_features(df)
 
     root_urls = df[~df['is_phishing'].isin([NAN_VALUE])]['url']
diff --git a/phishGNN/dataset_v1.py b/phishGNN/dataset_v1.py
index 70c8cec..c91f942 100644
--- a/phishGNN/dataset_v1.py
+++ b/phishGNN/dataset_v1.py
@@ -11,7 +11,6 @@
 import dataprep
 from utils.compute_device import COMPUTE_DEVICE
 from utils.utils import normalize_www_prefix
-from typing import Tuple, List
 
 print(f'Torch version: {torch.__version__}')
 print(f'Compute device: {COMPUTE_DEVICE}')
@@ -43,12 +42,12 @@ def __init__(
         super(PhishingDataset, self).__init__(root, transform, pre_transform)
 
     @property
-    def raw_file_names(self) -> List[str]:
+    def raw_file_names(self) -> list[str]:
         """File name of the csv dataset. """
         return glob.glob(os.path.join(self.raw_dir, '*'))
 
     @property
-    def processed_file_names(self) -> List[str]:
+    def processed_file_names(self) -> list[str]:
         return [file + '.pt' for file in self.raw_file_names]
 
     @property
@@ -90,7 +89,7 @@ def process(self) -> None:
     def len(self):
         return (len(os.listdir(self.processed_dir)) - 4) // 2
 
-    def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> Tuple[Tensor, Tensor, Tensor, Tensor, dict]:
+    def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> tuple[Tensor, Tensor, Tensor, Tensor, dict]:
         """Builds the required tensors for one graph.
         These matrices will be then used for training the GNN.
 
diff --git a/phishGNN/dataset_v2.py b/phishGNN/dataset_v2.py
index 6976a9c..7c501a8 100644
--- a/phishGNN/dataset_v2.py
+++ b/phishGNN/dataset_v2.py
@@ -1,6 +1,5 @@
 import glob
 import os
-from typing import Tuple, List
 
 import pandas as pd
 import torch
@@ -44,12 +43,12 @@ def __init__(
         super(PhishingDataset2, self).__init__(root, transform, pre_transform)
 
     @property
-    def raw_file_names(self) -> List[str]:
+    def raw_file_names(self) -> list[str]:
         """File name of the csv dataset. """
         return glob.glob(os.path.join(self.raw_dir, '*'))
 
     @property
-    def processed_file_names(self) -> List[str]:
+    def processed_file_names(self) -> list[str]:
         return [file + '.pt' for file in self.raw_file_names]
 
     @property
@@ -105,7 +104,7 @@ def process(self) -> None:
     def len(self):
         return (len(os.listdir(self.processed_dir)) - 4) // 2
 
-    def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> Tuple[Tensor, Tensor, Tensor, Tensor, dict]:
+    def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> tuple[Tensor, Tensor, Tensor, Tensor, dict]:
         """Builds the required tensors for one graph.
         These matrices will be then used for training the GNN.
 
diff --git a/phishGNN/dataset_v3.py b/phishGNN/dataset_v3.py
deleted file mode 100644
index a989886..0000000
--- a/phishGNN/dataset_v3.py
+++ /dev/null
@@ -1,192 +0,0 @@
-import glob
-import os
-from typing import Tuple, List
-
-import pandas as pd
-import torch
-import torch_geometric
-from torch import Tensor
-from sklearn.model_selection import train_test_split
-from torch_geometric.data import Data, Dataset
-from tqdm import tqdm
-
-import dataprep
-from other_models import train_random_forest
-from utils.compute_device import COMPUTE_DEVICE
-
-print(f'Torch version: {torch.__version__}')
-print(f'Compute device: {COMPUTE_DEVICE}')
-print(f'Torch geometric version: {torch_geometric.__version__}')
-
-# set default dtype, as MPS Pytorch does not support float64
-torch.set_default_dtype(torch.float32)
-
-
-class PhishingDataset3(Dataset):
-    """Dataset containing both phishing and non-phishing website urls. """
-
-    def __init__(
-            self,
-            root: str,
-            do_data_preparation: bool = True,
-            visualization_mode: bool = False,
-            nan_value: float = -1.0,
-            transform=None,
-            pre_transform=None,
-    ):
-        """
-        root = Where the dataset should be stored. This folder is split
-        into raw_dir (downloaded dataset) and processed_dir (processed data). 
-        """
-        self.do_data_preparation = do_data_preparation
-        self.visualization_mode = visualization_mode
-        self.nan_value = nan_value
-        super(PhishingDataset3, self).__init__(root, transform, pre_transform)
-
-    @property
-    def raw_file_names(self) -> List[str]:
-        """File name of the csv dataset. """
-        return glob.glob(os.path.join(self.raw_dir, '*'))
-
-    @property
-    def processed_file_names(self) -> List[str]:
-        return [file + '.pt' for file in self.raw_file_names]
-
-    @property
-    def num_classes(self):
-        return 2
-
-    def file_name(self, idx: int) -> str:
-        if self.visualization_mode:
-            return f'data_viz_{idx}.pt'
-        return f'data_{idx}.pt'
-
-    def process(self) -> None:
-        """Reads csv files in data/raw and preprocess so that output
-        preprocessed files are written in data/processed folder.
-        """
-        if not self.do_data_preparation:
-            return
-
-        # loop over all files in `raw_file_names`
-        for raw_path in self.raw_paths:
-            df, X, y = dataprep.load_train_set(raw_path, train_test_equilibrum=False)
-
-            forest, _ = train_random_forest(X, X, y, y)
-
-            every_urls, every_features = dataprep.load_every_urls_with_features(df, raw_path)
-            every_preds = forest.predict(every_features)
-
-            root_urls = df[~df['is_phishing'].isin([self.nan_value])]['url']
-
-            df.drop(df.iloc[:, 2:-1], inplace=True, axis=1)
-            df['url']: every_urls
-            df['is_phishing_pred'] = every_preds
-
-            df = df.set_index('url')
-            df_to_dict = df.to_dict('index')
-
-            # loop over each root urls in the dataset
-            for i, (_, url) in enumerate(tqdm(root_urls.items(), total=len(root_urls))):
-                edge_index, x, _, y, viz_utils = self._build_tensors(url, df_to_dict, df.index)
-
-                self.data = Data(x=x, edge_index=edge_index, y=y)
-                torch.save(self.data, os.path.join(self.processed_dir, f'data_{i}.pt'))
-
-                # save another file with variables needed for visualization
-                self.data.pos = viz_utils
-                torch.save(self.data, os.path.join(self.processed_dir, f'data_viz_{i}.pt'))
-
-    def len(self):
-        return (len(os.listdir(self.processed_dir)) - 2)
-
-    def _build_tensors(self, root_url: str, df_to_dict, existing_urls) -> Tuple[Tensor, Tensor, Tensor, Tensor, dict]:
-        """Builds the required tensors for one graph.
-        These matrices will be then used for training the GNN.
-
-        Args:
-            df: the dataset of one graph as form of pandas daframe
-
-        Returns:
-            Tuple[edge_index, x, edge_attr, y, viz_utils]
-        """
-        from_, to_, edges_ = [], [], []
-        id_to_feat = {}
-        url_to_id = {}
-        queue = [root_url]
-        visited = set()
-        error_pages = set()
-
-        def map_url_to_id(url: str):
-            url_to_id[url] = len(url_to_id) \
-                if url not in url_to_id else url_to_id[url]
-
-        def bool_to_float(value: bool):
-            return 1. if value else 0.
-
-        while True:
-            if len(queue) == 0:
-                break
-            url = queue.pop()
-            try:
-                node = df_to_dict[url]
-            except KeyError:
-                node = self.error_page_node_feature
-
-            refs = node['refs']
-            map_url_to_id(url)
-
-            for i, edge in enumerate(refs):
-                ref = edge['url']
-                is_same_domain = bool_to_float(edge['is_same_domain'])
-                is_form = bool_to_float(edge['is_form'])
-                is_anchor = bool_to_float(edge['is_anchor'])
-
-                if (url, ref, i) in visited:
-                    break
-                if ref not in existing_urls:
-                    error_pages.add(ref)
-                map_url_to_id(ref)
-
-                from_.append(url_to_id[url])
-                to_.append(url_to_id[ref])
-                edges_.append([1])  # should be edge features
-
-                is_anchor = ref == url
-                if not is_anchor:
-                    queue.append(ref)
-                visited.add((url, ref, i))
-
-            # remove url and refs
-            features = [node['is_phishing_pred']]
-            id_to_feat[url_to_id[url]] = features
-
-        x = [id_to_feat[k] for k in sorted(id_to_feat)]
-        visualization = {
-            'url_to_id': url_to_id,
-            'error_pages': error_pages,
-        }
-
-        return (
-            torch.tensor([from_, to_], dtype=torch.int64),
-            torch.tensor(x, dtype=torch.float32),
-            torch.tensor(edges_, dtype=torch.int64),
-            torch.tensor(df_to_dict[root_url]['is_phishing'], dtype=torch.int64),
-            visualization,
-        )
-
-    def get(self, idx):
-        t = torch.load(os.path.join(self.processed_dir, self.file_name(idx)))
-        t.x = t.x.to(dtype=torch.float32)
-        t.y = t.y.to(dtype=torch.int64)
-        t.edge_index = t.edge_index.to(dtype=torch.int64)
-        return t
-
-    @property
-    def error_page_node_feature(self):
-        data = {
-            'is_phishing': self.nan_value,
-            'is_phishing_pred': self.nan_value,
-            'refs': [],
-        }
-        return pd.Series(data=data)
diff --git a/phishGNN/other_models.py b/phishGNN/other_models.py
index 2be5344..3397f97 100644
--- a/phishGNN/other_models.py
+++ b/phishGNN/other_models.py
@@ -15,7 +15,7 @@
 
 
 # from models.ffn import FeedforwardNeuralNetModel
-from models import FeedforwardNeuralNetModel
+from .models import FeedforwardNeuralNetModel
 
 
 def warn(*args, **kwargs):
diff --git a/phishGNN/predict.py b/phishGNN/predict.py
index 7cfb9e7..a9372d0 100644
--- a/phishGNN/predict.py
+++ b/phishGNN/predict.py
@@ -4,22 +4,21 @@
 
 import torch
 
-# from dataset_v1 import PhishingDataset
-from dataset_v3 import PhishingDataset3
+from dataset_v1 import PhishingDataset
 from utils.compute_device import COMPUTE_DEVICE
 
 
 def predict(url: str, weights_file: str) -> int:
     path = os.path.join(os.getcwd(), 'data', 'predict')
-    data_files = sorted(glob.glob(os.path.join(path, 'raw', '*.csv')))
+    data_files = sorted(glob.glob(os.path.join(path, 'processed', '*')))
     if not os.path.exists(path) or len(data_files) == 0:
         raise FileNotFoundError(f'No files found in path {path}, please the crawler before.')
 
-    dataset = PhishingDataset3(root=path, do_data_preparation=True)
+    dataset = PhishingDataset(root=path, do_data_preparation=True)
     data = dataset[0]
     data = data.to(COMPUTE_DEVICE)
 
-    model = torch.load(os.path.join(os.getcwd(), 'weights/', weights_file), map_location=COMPUTE_DEVICE).to(COMPUTE_DEVICE)
+    model = torch.load(os.path.join(os.getcwd(), 'weights/', weights_file)).to(COMPUTE_DEVICE)
     model.eval()
     out = model(data.x, data.edge_index, data.batch)
     pred = out.argmax(dim=1)
@@ -28,11 +27,12 @@ def predict(url: str, weights_file: str) -> int:
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--url', type=str, help='the url to predict (phishing/benign)', default="http://www.amazon.fr")
-    parser.add_argument('--pkl_file', type=str, default='10_epochs_default/GCN_3_global_mean_pool_32.pkl', help='the path to the model weights (.pkl)')
+    parser.add_argument('url', type=str, help='the url to predict (phishing/benign)')
+    parser.add_argument('pkl_file', type=str, default='GCN_3_global_mean_pool_32.pkl',
+                        help='the path to the model weights (.pkl)')
     args, _ = parser.parse_known_args()
 
-    pred = predict(args.url, args.pkl_file)
+    pred = predict(args.url, args.weights_file)
 
     if pred == 1:
         print('Phishing')
diff --git a/phishGNN/utils/utils.py b/phishGNN/utils/utils.py
index 2c08ecb..83943d9 100644
--- a/phishGNN/utils/utils.py
+++ b/phishGNN/utils/utils.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple
+from typing import List
 from urllib.parse import urlparse
 
 import numpy as np
@@ -25,7 +25,7 @@ def log_fail(msg: str):
     print(f'{bcolors.FAIL}FAILURE:\t{bcolors.ENDC}{msg}')
 
 
-def tensor_to_tuple_list(tensor: torch.Tensor) -> List[Tuple[int, int]]:
+def tensor_to_tuple_list(tensor: torch.Tensor) -> list[tuple[int, int]]:
     """Converts a tensor of shape [[x], [y]] in an
     array of tuples of shape [(x, y)].
     """