diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..738d9e0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# Backup files +*.swp diff --git a/createproject.sh b/createproject.sh index 856a316..78e28d2 100644 --- a/createproject.sh +++ b/createproject.sh @@ -1,94 +1,152 @@ -# This is meant to create a Data Science project following the -# format here: +# ------------------------------------------------------------------- +# CREATEPROJECT +# ------------------------------------------------------------------- +# This Bash script is meant to create a Data Science project +# following the format here: # https://github.com/drivendata/cookiecutter-data-science # with some minor tweaks and with some automation. - +# # Some updates in the drivendata repo are expected soon, which will # require an update from this (Apr 12, 2019). - +# # ------------------------------------------------------------------- -# REQUIREMENTS +# FEATURES # ------------------------------------------------------------------- -# conda/miniconda -# conda-forge channel added. (conda config --add channels conda-forge) +# Project name, conda environment given same name +# +# Package name is the project name but only a-z characters +# +# Creates a fresh conda environment with packages of your choice # +# Installs additional packages of your choice from pip +# +# If you install kaggle from pip, you can specify a dataset to +# download and unzip into the project/data/raw folder. +# +# Option to create a local git repo +# +# Option to create a GitHub repo and push your local repo. +# +# Option to push your local git repo to an existing GitHub repo. +# +# ------------------------------------------------------------------- +# REQUIREMENTS +# ------------------------------------------------------------------- +# I only use this on ubuntu. Adapt as you wish for other distros. +# +# conda/miniconda. +# conda-forge channel. (conda config --add channels conda-forge) +# cookiecutter. (install via conda or via pip - see link at top). +# unzip. (sudo apt-get install unzip) +# # ------------------------------------------------------------------- # HOW TO USE # ------------------------------------------------------------------- # Go to a parent folder where your repo folder will go in. Then run # ./path/to/createproject.sh +# Then follow the prompts. # ------------------------------------------------------------------- +# Cookiecutter download here. +cookiecutter https://github.com/drivendata/cookiecutter-data-science + +# Find the latest created folder, that is the project name. +environment_name=$(ls -t | head -1) + # First we create a conda environment. echo "First we create a conda environment for our new project." -read -p "Enter the conda env name (recommended same name as repo) : " environment_name conda create -n $environment_name -# remove is conda remove --name myenv --all conda activate $environment_name -# Check if this works without adding the conda-forge channel to the base env -echo "Then we install cookiectter." -conda install cookiecutter - # Packages to install from conda echo "Let's install some conda packages." read -p "List the packages you want to install from conda : " condapkg_install if [[ ! -z "$condapkg_install" ]]; then conda install $condapkg_install +else + conda install numpy pandas matplotlib # The holy trinity. fi -# echo "Now for pip packages. Only pick packages you can't get from conda." -# echo "I recommend you install dotenv. It keps you manage local environment variables and API keys." -# echo "I also recommend you install kaggle if you want to download a dataset from there." -# read -p "List the packages you want to install from pip : " pippkg_install -# pip install $pippkg_install +echo "Now for pip packages. Only pick packages you can't get from conda." +echo "I recommend you install dotenv. It keps you manage local environment variables and API keys." +echo "I also recommend you install kaggle if you want to download a dataset from there." +read -p "List the packages you want to install from pip : " pippkg_install -# echo "Please name your repo/project the same as the conda env name or else this script will break." -# cookiecutter https://github.com/drivendata/cookiecutter-data-science +if [[ ! -z "pippkg_install" ]]; then + pip install $pippkg_install +fi -# cd $environment_name +cd $environment_name # Download data from kaggle -# cd data/raw -# if [[ $pippkg_install == *"kaggle"* ]]; then -# echo "You have opted to install kaggle. If you want to download data, make sure" -# echo "you have set up your API key as described in https://github.com/kaggle/kaggle-api" -# read -p "Do you want to download a dataset? [y/n] : " kag_dwn -# if [[ $kag_dwn == "y" ]]; then -# echo "Enter the link to the dataset. For example, if " -# echo "the link to the dataset is kaggle.com/mlg-ulb/creditcardfraud " -# echo "then enter mlg-ulb/creditcardfraud" -# read -p "Enter link : " kag_das -# kaggle datasets download -d $kag_das -# fi -# fi -# cd ../.. - -# Replace the data folder with nothing. -# sed -i -e "s_/data/_#Nothing" .gitignore +cd data/raw +if [[ $pippkg_install == *"kaggle"* ]]; then + echo "You have opted to install kaggle. If you want to download data, make sure" + echo "you have set up your API key as described in https://github.com/kaggle/kaggle-api" + read -p "Do you want to download a dataset? [y/n] : " kag_dwn + if [[ $kag_dwn == "y" ]]; then + echo "Enter the link to the dataset. For example, if " + echo "the link to the dataset is kaggle.com/mlg-ulb/creditcardfraud " + echo "then enter mlg-ulb/creditcardfraud" + read -p "Enter link : " kag_das + kaggle datasets download -d $kag_das + newfile_name=$(ls -t | head -n1) + echo "Can only handle zip files right now. Can change later." + unzip $newfile_name + fi +fi +cd ../.. + +# Replace the data folder with nothing in the .gitignore file. +sed -i -e "s_/data/_#Nothing_g" .gitignore # Tweak .gitignore file. -# echo "# Include data sub-folders" >> .gitignore -# echo "data/raw/*" >> .gitignore -# echo "data/interim/*" >> .gitignore -# echo "data/processed/*" >> .gitignore -# echo "data/external/*" >> .gitignore -# echo "!.gitkeep" >> .gitignore +echo "# Include data sub-folders" >> .gitignore +echo "data/raw/*" >> .gitignore +echo "data/interim/*" >> .gitignore +echo "data/processed/*" >> .gitignore +echo "data/external/*" >> .gitignore +echo "!.gitkeep" >> .gitignore # Replace .env with *.env -# sed -i -e 's/.env/*.env' .gitignore +sed -i -e 's/.env/*.env/g' .gitignore # Rename package from src to something else. -# echo "Your package will be given the same name as your conda env/repo" -# echo "Except any non-letters will be taken out." -# ppkg_name=$(echo $environment_name | sed 's/[^a-zA-Z]*//g') -# mv src $ppkg_name -# sed -i -e "s/src/$ppkg_name/g" setup.py +echo "Your package will be given the same name as your conda env/repo" +echo "Except any non-letters will be taken out." +ppkg_name=$(echo $environment_name | sed 's/[^a-zA-Z]*//g') +mv src $ppkg_name +sed -i -e "s/src/$ppkg_name/g" setup.py # Pip install package. -# pip install -e . +pip install -e . # Initialize and push git repo. +read -p "Do you want to create a git repo? [y/n] : " create_git + +if [[ $create_git == "y" ]]; then + git init + git add . + git commit -m "First commit." + read -p "Do you want to create a new github repo? [y/n] : " create_github + if [[ $create_github == "y" ]]; then + read -p "Provide your GitHub username. " github_username + curl -u $github_username https://api.github.com/user/repos -d '{"name":"'"$environment_name"'"}' + git remote add origin https://github.com/$github_username/$environment_name.git + git push origin master + else + read -p "Do you want to push this to an existing github repo? [y/n] : " add_github + if [[ $add_github == "y" ]]; then + read -p "Provide the URL to your repository. " github_remote_url + git remote add origin $github_remote_url + git push origin master + fi + fi +fi + +echo "" +echo "Okay we're all set!" +echo "" # That's it for now!