-
Notifications
You must be signed in to change notification settings - Fork 274
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
257 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,257 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
}, | ||
"accelerator": "GPU", | ||
"gpuClass": "standard" | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "pJ7gwY3ZJQmi", | ||
"outputId": "e6286bd6-bf8b-461b-ede6-57cf0dce0ee1" | ||
}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | ||
"Requirement already satisfied: transformers in /usr/local/lib/python3.9/dist-packages (4.28.1)\n", | ||
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)\n", | ||
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)\n", | ||
"Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.4)\n", | ||
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)\n", | ||
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)\n", | ||
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n", | ||
"Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.11.0)\n", | ||
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.3)\n", | ||
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)\n", | ||
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n", | ||
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)\n", | ||
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)\n", | ||
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)\n", | ||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)\n", | ||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | ||
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (0.1.98)\n", | ||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | ||
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.9/dist-packages (0.0.53)\n", | ||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from sacremoses) (4.65.0)\n", | ||
"Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from sacremoses) (8.1.3)\n", | ||
"Requirement already satisfied: six in /usr/local/lib/python3.9/dist-packages (from sacremoses) (1.16.0)\n", | ||
"Requirement already satisfied: regex in /usr/local/lib/python3.9/dist-packages (from sacremoses) (2022.10.31)\n", | ||
"Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from sacremoses) (1.2.0)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%pip install transformers\n", | ||
"%pip install sentencepiece\n", | ||
"%pip install sacremoses" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"import torch\n", | ||
"torch.cuda.is_available()" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "-JA4al2aZlkR", | ||
"outputId": "d4a56397-632e-4e71-e673-6619b4b2ae31" | ||
}, | ||
"execution_count": 2, | ||
"outputs": [ | ||
{ | ||
"output_type": "execute_result", | ||
"data": { | ||
"text/plain": [ | ||
"True" | ||
] | ||
}, | ||
"metadata": {}, | ||
"execution_count": 2 | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from transformers import pipeline\n", | ||
"\n", | ||
"classifier = pipeline(task=\"sentiment-analysis\", device=0)\n", | ||
"preds = classifier(\"I am really happy today!\")\n", | ||
"print(preds)" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "P3SHzpsyJUXv", | ||
"outputId": "16b82ba0-d0e3-4787-8314-23773c1c071c" | ||
}, | ||
"execution_count": 4, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stderr", | ||
"text": [ | ||
"No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n", | ||
"Using a pipeline without specifying a model name and revision in production is not recommended.\n" | ||
] | ||
}, | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"[{'label': 'POSITIVE', 'score': 0.9998762607574463}]\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"classifier = pipeline(task=\"sentiment-analysis\", model=\"uer/roberta-base-finetuned-jd-binary-chinese\", device=0)\n", | ||
"preds = classifier(\"这个餐馆太难吃了。\")\n", | ||
"print(preds)" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "k1uX2WvxJ55Z", | ||
"outputId": "37cfa3e9-d632-4837-f0a0-6661b9c02ab5" | ||
}, | ||
"execution_count": 5, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.934112012386322}]\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"translation = pipeline(task=\"translation_en_to_zh\", model=\"Helsinki-NLP/opus-mt-en-zh\", device=0)\n", | ||
"\n", | ||
"text = \"I like to learn data science and AI.\"\n", | ||
"translated_text = translation(text)\n", | ||
"print(translated_text)" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "Y_SLr9NRT-uh", | ||
"outputId": "55bb674d-9767-4f05-e50d-6bad704423e7" | ||
}, | ||
"execution_count": 13, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"[{'translation_text': '我喜欢学习数据科学和人工智能'}]\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from transformers import pipeline\n", | ||
"\n", | ||
"transcriber = pipeline(model=\"openai/whisper-medium\", device=0)\n", | ||
"result = transcriber(\"./data/podcast_clip.mp3\")\n", | ||
"print(result)" | ||
], | ||
"metadata": { | ||
"id": "eNPAR9nQL4qz", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"outputId": "d7da5cd3-9830-415d-a583-adfaab404420" | ||
}, | ||
"execution_count": 6, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stderr", | ||
"text": [ | ||
"/usr/local/lib/python3.9/dist-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n", | ||
" warnings.warn(\n" | ||
] | ||
}, | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"{'text': \" Welcome to OnBoard, a real first-line experience, a new investment thinking. I'm Monica. I'm Gao Ning. Let's talk about how software changes the world.\"}\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from transformers import pipeline\n", | ||
"from transformers import WhisperProcessor, WhisperForConditionalGeneration\n", | ||
"processor = WhisperProcessor.from_pretrained(\"openai/whisper-medium\")\n", | ||
"forced_decoder_ids = processor.get_decoder_prompt_ids(language=\"zh\", task=\"transcribe\")\n", | ||
"\n", | ||
"transcriber = pipeline(model=\"openai/whisper-medium\", device=0,\n", | ||
" generate_kwargs={\"forced_decoder_ids\": forced_decoder_ids})\n", | ||
"result = transcriber(\"./data/podcast_clip.mp3\")\n", | ||
"print(result)" | ||
], | ||
"metadata": { | ||
"id": "dTd4eTBwNLn5", | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"outputId": "66b7005c-bcc5-434d-d8c9-4dd28b104716" | ||
}, | ||
"execution_count": 7, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stderr", | ||
"text": [ | ||
"/usr/local/lib/python3.9/dist-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n", | ||
" warnings.warn(\n" | ||
] | ||
}, | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"{'text': '欢迎来到Onboard真实的一线经验走新的投资思考我是Monica我是高宁我们一起聊聊软件如何改变世界'}\n" | ||
] | ||
} | ||
] | ||
} | ||
] | ||
} |