Rename voice to speech recognition

ruscias · Jun 8, 2022 · 1fdecb6 · 1fdecb6
1 parent fdb44d1
commit 1fdecb6
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 75 deletions.
diff --git a/.gitignore b/.gitignore
@@ -27,6 +27,7 @@ episodes/
 *.wav
 podcast_summary2.py
 temp.mp3
+voice2.ipynb
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/voice/README.md → speech_recognition/README.md b/voice/README.md → speech_recognition/README.md
@@ -1,19 +1,21 @@
 # Project Overview
 
-In this project, we'll build a system that can automatically take notes for us by turning speech into text and generating a summary.  We'll also include a way to hook up a microphone to automatically record and transcribe audio for live notetaking.
+In this project, we'll build a system that can automatically recognize speech and summarize it.  This can be used for automatically transcribing and summarizing lecture recordings, podcasts, or videos.
 
-By the end of this project, you'll have a voice recognition project that you can continue to build on.
+We'll also include a way to hook up a microphone to automatically record and transcribe audio for live notetaking.  This could be used to record and transcribe meetings in real-time.
+
+By the end of this project, you'll have a speech to text project that you can continue to build on.
 
 **Project Steps**
 
-* Create a voice recognition system using vosk
-* Add punctuation to the text transcript us recasepunc
-* Summarize the text using huggingface transformers
+* Create a speech recognition system using vosk
+* Add punctuation to the text transcript using recasepunc
+* Summarize the text using a huggingface summarization pipeline
 * Create a widget to record and transcribe live audio
 
 ## Code
 
-You can find the code for this project [here](https://github.com/dataquestio/project-walkthroughs/tree/master/voice).
+You can find the code for this project [here](https://github.com/dataquestio/project-walkthroughs/tree/master/speech_recognition).
 
 File overview:
 
@@ -73,5 +75,5 @@ Pyaudio can be a little tricky to install, since it depends on system packages.
 
 You'll want to download a couple of audio files to test the transcription with:
 
-* [marketplace_full.mp3](https://github.com/dataquestio/project-walkthroughs/raw/master/voice/marketplace_full.mp3)
-* [marketplace.mp3](https://github.com/dataquestio/project-walkthroughs/raw/master/voice/marketplace.mp3)
+* [marketplace_full.mp3](https://github.com/dataquestio/project-walkthroughs/raw/master/speech_recognition/marketplace_full.mp3)
+* [marketplace.mp3](https://github.com/dataquestio/project-walkthroughs/raw/master/speech_recognition/marketplace.mp3)
diff --git a/voice/marketplace.mp3 → speech_recognition/marketplace.mp3 b/voice/marketplace.mp3 → speech_recognition/marketplace.mp3
diff --git a/voice/marketplace_full.mp3 → speech_recognition/marketplace_full.mp3 b/voice/marketplace_full.mp3 → speech_recognition/marketplace_full.mp3
diff --git a/voice/transcript.txt → speech_recognition/transcript.txt b/voice/transcript.txt → speech_recognition/transcript.txt
diff --git a/voice/voice.ipynb → speech_recognition/voice.ipynb b/voice/voice.ipynb → speech_recognition/voice.ipynb
@@ -307,76 +307,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 5,
    "id": "957d3aef-9c37-421f-acfc-21111d5158df",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bc20964742bb4ed4b757a89df50f1179",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "190a20a8476a4f5fa0f1a843f409c75b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/231M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ccebb62a1a574511b54183ad725774d9",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "879b70e705574f059fcde50a3e881bf7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/vik/.virtualenvs/voice/lib/python3.9/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
-      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
-      "- Be aware that you SHOULD NOT rely on t5-small automatically truncating your input to 512 when padding/encoding.\n",
-      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
-      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
-      "  warnings.warn(\n"
+      "No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 (https://huggingface.co/sshleifer/distilbart-cnn-12-6)\n"
      ]
     }
    ],
@@ -388,7 +327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 6,
    "id": "18e3b982-da04-4f17-9b49-28bdcf8c2151",
    "metadata": {},
    "outputs": [],
@@ -402,7 +341,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 7,
    "id": "2f75311e-95fb-43eb-a068-9c17e99f1f01",
    "metadata": {},
    "outputs": [],
@@ -412,7 +351,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 8,
    "id": "3d61cc49-72e4-44e6-9fa2-91eb169ba0d0",
    "metadata": {},
    "outputs": [],
@@ -422,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 9,
    "id": "ff0d8cd7-000f-419f-98ee-b57b747b01db",
    "metadata": {},
    "outputs": [

diff --git a/speech_recognition/voice2.ipynb b/speech_recognition/voice2.ipynb
@@ -0,0 +1,33 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05d36428-1a47-4a5b-9641-45917833a7b0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}