update

techthiyanes · Apr 13, 2023 · 7bf9a82 · 7bf9a82
1 parent 94c77ce
commit 7bf9a82
Show file tree

Hide file tree

Showing 9 changed files with 375 additions and 67 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -34,11 +34,11 @@ repos:
   - id: isort
     exclude: ^(docs/)|(project/)|(assignments/)|(project/interface/)|(examples/)
 
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.971
-  hooks:
-  - id: mypy
-    exclude: ^(docs/)|(project/)|(assignments/)|(project/interface/)|(examples/)
+# - repo: https://github.com/pre-commit/mirrors-mypy
+#   rev: v0.971
+#   hooks:
+#   - id: mypy
+#     exclude: ^(docs/)|(project/)|(assignments/)|(project/interface/)|(examples/)
 
 
 # Black, the code formatter, natively supports pre-commit

diff --git a/examples/data.json b/examples/data.json
diff --git a/examples/gradio_example.py b/examples/gradio_example.py
@@ -0,0 +1,52 @@
+# + tags=["hide_inp"]
+
+desc = """
+### Gradio Tool
+
+Chain that ask for a command-line question and then runs the bash command. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/srush/MiniChain/blob/master/examples/bash.ipynb)
+
+(Adapted from LangChain [BashChain](https://langchain.readthedocs.io/en/latest/modules/chains/examples/llm_bash.html))
+"""
+# -
+
+# $
+
+from minichain import show, prompt, OpenAI, OpenAIStream
+import gradio as gr
+from gradio_tools.tools import StableDiffusionTool, ImageCaptioningTool
+
+@prompt(OpenAIStream(), stream=True)
+def picture(model, query):
+    out = ""
+    for r in model.stream(query):
+        out += r
+        yield out
+
+@prompt(StableDiffusionTool(), stream=True, block_input=lambda: gr.Textbox(label=""))
+def gen(model, query):
+    for r in model.stream(query):
+        yield "https://htmlcolorcodes.com/assets/images/colors/baby-blue-color-solid-background-1920x1080.png"
+    yield r
+
+@prompt(ImageCaptioningTool(), block_output=lambda: gr.Textbox(label=""))
+def caption(model, img_src):
+    return model(img_src)
+
+def gradio_example(query):
+    return caption(gen(picture(query)))
+
+
+# $
+
+gradio = show(gradio_example,
+              subprompts=[picture, gen, caption],
+              examples=['Describe a one-sentence fantasy scene.',
+                        'Describe a one-sentence scene happening on the moon.'],
+              out_type="markdown",
+              description=desc,
+              css="#advanced {display: none}"
+
+              )
+if __name__ == "__main__":
+    gradio.queue().launch()
+
diff --git a/examples/table.pmpt.txt b/examples/table.pmpt.txt
@@ -0,0 +1,31 @@
+You are a utility built to extract structured information from documents. You are returning a TSV table. Here are the headers .
+
+----
+{{type}}        {% for k in player_keys %}{{k[0]}}{{"\t" if not loop.last}}{% endfor %}
+----
+
+Return the rest of the table in TSV format. Here are some examples
+
+{% for example in examples %}
+Example
+---
+{{example.input}}
+---
+
+Output
+---
+{{example.output}}
+---
+{% endfor %}
+
+Article:
+----
+{{passage}}
+----
+
+All other values should be numbers or _.
+Only include numbers that appear explicitly in the passage below.
+If you cannot find the value in the table, output _. Most cells will be _.
+
+Ok, here is the correctly valid TSV with headers and nothing else. Remember only include values that are directly written in the article. Do not guess or combine rows.
+
diff --git a/examples/table.py b/examples/table.py
@@ -0,0 +1,81 @@
+# + tags=["hide_inp"]
+desc = """
+### Table
+
+"""
+# -
+
+# $
+import pandas as pd
+from minichain import prompt, Mock, show, OpenAIStream
+import minichain
+import json
+import gradio as gr
+
+rotowire = json.load(open("data.json"))
+names = {
+    '3-pointer percentage': 'FG3_PCT',
+    '3-pointers attempted': 'FG3A',
+    '3-pointers made': 'FG3M',
+    'Assists': 'AST',
+    'Blocks': 'BLK',
+    'Field goal percentage': 'FG_PCT',
+    'Field goals attempted': 'FGA',
+    'Field goals made': 'FGM',
+    'Free throw percentage': 'FT_PCT',
+    'Free throws attempted': 'FTA',
+    'Free throws made': 'FTM',
+    'Minutes played': 'MIN',
+    'Personal fouls': 'PF',
+    'Points': 'PTS',
+    'Rebounds': 'REB',
+    'Rebounds (Defensive)': 'DREB',
+    'Rebounds (Offensive)': 'OREB',
+    'Steals': 'STL',
+    'Turnovers': 'TO'
+}
+# Convert an example to dataframe
+def to_df(d):
+    players = {player for v in d.values() if v is not None for player, _  in v.items()}
+    lookup = {k: {a: b for a, b in v.items()} for k,v in d.items()}
+    rows = [{"player": p} | {k: "_" if p not in lookup.get(k, []) else lookup[k][p] for k in names.keys()}
+            for p in players]
+    return pd.DataFrame.from_dict(rows).astype("str").sort_values(axis=0, by="player", ignore_index=True).transpose()
+
+
+# Make few shot examples
+few_shot_examples = 2
+examples = []
+for i in range(few_shot_examples):
+    examples.append({"input": rotowire[i][1],
+                     "output": to_df(rotowire[i][0][1]).transpose().set_index("player").to_csv(sep="\t")})
+
+@prompt(OpenAIStream(),
+        template_file="table.pmpt.txt",
+        block_output=gr.HTML,
+        stream=True)
+def extract(model, passage, typ):
+    state = []
+    out = ""
+    for token in model.stream(dict(player_keys=names.items(), examples=examples, passage=passage, type=typ)):
+        out += token
+        html = "<table><tr><td>" + out.replace("\t", "</td><td>").replace("\n", "</td></tr><tr><td>")  + "</td></td></table>"
+        yield html
+    yield html
+
+
+
+def run(query):
+    return extract(query, "Player")
+
+# $
+
+gradio = show(run,
+              examples = [rotowire[i][1] for i in range(50, 55)],
+              subprompts=[extract],
+              code=open("table.py", "r").read().split("$")[1].strip().strip("#").strip(),
+              out_type="markdown"
+            )
+
+if __name__ == "__main__":
+    gradio.queue().launch()
diff --git a/minichain/__init__.py b/minichain/__init__.py
@@ -8,6 +8,7 @@
     Mock,
     OpenAI,
     OpenAIEmbed,
+    OpenAIStream,
     Python,
     Request,
     set_minichain_log,

diff --git a/minichain/backend.py b/minichain/backend.py
@@ -1,5 +1,6 @@
 import os
 import subprocess
+import time
 from dataclasses import dataclass
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple
@@ -17,6 +18,12 @@ class Request:
 
 
 class Backend:
+    needs_request = True
+
+    @property
+    def description(self):
+        return ""
+
     def run(self, request: Request) -> str:
         raise NotImplementedError
 
@@ -38,6 +45,13 @@ def run(self, request: Request) -> str:
         self.i += 1
         return self.answers[self.i % len(self.answers)]
 
+    def run_stream(self, request: Request) -> str:
+        self.i += 1
+        result = self.answers[self.i % len(self.answers)]
+        for c in result:
+            yield c
+            time.sleep(10)
+
     def __repr__(self) -> str:
         return f"Mocked Backend {self.answers}"
 
@@ -83,7 +97,7 @@ def run(self, request: Request) -> str:
         return str(toret)
 
     def __repr__(self) -> str:
-        return f"Google Search Backend"
+        return "Google Search Backend"
 
 
 class Python(Backend):
@@ -139,7 +153,12 @@ def __repr__(self) -> str:
 
 
 class OpenAIBase(Backend):
-    def __init__(self, model: str = "text-davinci-003", max_tokens: int = 256, temperature: float = 0.0) -> None:
+    def __init__(
+        self,
+        model: str = "text-davinci-003",
+        max_tokens: int = 256,
+        temperature: float = 0.0,
+    ) -> None:
         self.model = model
         self.options = dict(
             model=model,
@@ -151,6 +170,32 @@ def __repr__(self) -> str:
         return f"OpenAI Backend {self.options}"
 
 
+class OpenAIStream:
+    def __init__(self, answers: List[str] = []):
+        pass
+
+    def run_stream(self, prompt):
+        import openai
+
+        self.api_key = os.environ.get("OPENAI_API_KEY")
+        assert (
+            self.api_key
+        ), "Need an OPENAI_API_KEY. Get one here https://openai.com/api/"
+        openai.api_key = self.api_key
+
+        for chunk in openai.ChatCompletion.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": prompt}],
+            stream=True,
+        ):
+            content = chunk["choices"][0].get("delta", {}).get("content")
+            if content is not None:
+                yield content
+
+    def __repr__(self) -> str:
+        return "OpenAI Stream Backend"
+
+
 class OpenAI(OpenAIBase):
     def run(self, request: Request) -> str:
         import manifest