Skip to content

Commit

Permalink
add huggingface pipeline example
Browse files Browse the repository at this point in the history
  • Loading branch information
xuwenhao committed Apr 18, 2023
1 parent b4107d5 commit 5019bc1
Showing 1 changed file with 257 additions and 0 deletions.
257 changes: 257 additions & 0 deletions 22_huggingface_pipeline_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pJ7gwY3ZJQmi",
"outputId": "e6286bd6-bf8b-461b-ede6-57cf0dce0ee1"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.9/dist-packages (4.28.1)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.4)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.11.0)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (0.13.3)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (0.1.98)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.9/dist-packages (0.0.53)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from sacremoses) (4.65.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.9/dist-packages (from sacremoses) (8.1.3)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.9/dist-packages (from sacremoses) (1.16.0)\n",
"Requirement already satisfied: regex in /usr/local/lib/python3.9/dist-packages (from sacremoses) (2022.10.31)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (from sacremoses) (1.2.0)\n"
]
}
],
"source": [
"%pip install transformers\n",
"%pip install sentencepiece\n",
"%pip install sacremoses"
]
},
{
"cell_type": "code",
"source": [
"import torch\n",
"torch.cuda.is_available()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-JA4al2aZlkR",
"outputId": "d4a56397-632e-4e71-e673-6619b4b2ae31"
},
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"source": [
"from transformers import pipeline\n",
"\n",
"classifier = pipeline(task=\"sentiment-analysis\", device=0)\n",
"preds = classifier(\"I am really happy today!\")\n",
"print(preds)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "P3SHzpsyJUXv",
"outputId": "16b82ba0-d0e3-4787-8314-23773c1c071c"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n",
"Using a pipeline without specifying a model name and revision in production is not recommended.\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"[{'label': 'POSITIVE', 'score': 0.9998762607574463}]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"classifier = pipeline(task=\"sentiment-analysis\", model=\"uer/roberta-base-finetuned-jd-binary-chinese\", device=0)\n",
"preds = classifier(\"这个餐馆太难吃了。\")\n",
"print(preds)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "k1uX2WvxJ55Z",
"outputId": "37cfa3e9-d632-4837-f0a0-6661b9c02ab5"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[{'label': 'negative (stars 1, 2 and 3)', 'score': 0.934112012386322}]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"translation = pipeline(task=\"translation_en_to_zh\", model=\"Helsinki-NLP/opus-mt-en-zh\", device=0)\n",
"\n",
"text = \"I like to learn data science and AI.\"\n",
"translated_text = translation(text)\n",
"print(translated_text)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Y_SLr9NRT-uh",
"outputId": "55bb674d-9767-4f05-e50d-6bad704423e7"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[{'translation_text': '我喜欢学习数据科学和人工智能'}]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from transformers import pipeline\n",
"\n",
"transcriber = pipeline(model=\"openai/whisper-medium\", device=0)\n",
"result = transcriber(\"./data/podcast_clip.mp3\")\n",
"print(result)"
],
"metadata": {
"id": "eNPAR9nQL4qz",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "d7da5cd3-9830-415d-a583-adfaab404420"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
" warnings.warn(\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'text': \" Welcome to OnBoard, a real first-line experience, a new investment thinking. I'm Monica. I'm Gao Ning. Let's talk about how software changes the world.\"}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from transformers import pipeline\n",
"from transformers import WhisperProcessor, WhisperForConditionalGeneration\n",
"processor = WhisperProcessor.from_pretrained(\"openai/whisper-medium\")\n",
"forced_decoder_ids = processor.get_decoder_prompt_ids(language=\"zh\", task=\"transcribe\")\n",
"\n",
"transcriber = pipeline(model=\"openai/whisper-medium\", device=0,\n",
" generate_kwargs={\"forced_decoder_ids\": forced_decoder_ids})\n",
"result = transcriber(\"./data/podcast_clip.mp3\")\n",
"print(result)"
],
"metadata": {
"id": "dTd4eTBwNLn5",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "66b7005c-bcc5-434d-d8c9-4dd28b104716"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.9/dist-packages/transformers/generation/utils.py:1313: UserWarning: Using `max_length`'s default (448) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
" warnings.warn(\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'text': '欢迎来到Onboard真实的一线经验走新的投资思考我是Monica我是高宁我们一起聊聊软件如何改变世界'}\n"
]
}
]
}
]
}

0 comments on commit 5019bc1

Please sign in to comment.