diff --git a/recipes/weaviate/.gitignore b/recipes/weaviate/.gitignore new file mode 100644 index 0000000..ccf7d12 --- /dev/null +++ b/recipes/weaviate/.gitignore @@ -0,0 +1,267 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser diff --git a/recipes/weaviate/demo_app.py b/recipes/weaviate/demo_app.py new file mode 100644 index 0000000..30c5403 --- /dev/null +++ b/recipes/weaviate/demo_app.py @@ -0,0 +1,229 @@ +import streamlit as st +import time +import sys +import os +import base64 +from st_weaviate_connection import WeaviateConnection, WeaviateFilter +from weaviate.classes.query import Filter + +# Constants +ENV_VARS = ["WEAVIATE_URL", "WEAVIATE_API_KEY", "COHERE_API_KEY"] +NUM_IMAGES_PER_ROW = 5 +SEARCH_LIMIT = 10 + +# Search Mode descriptions +SEARCH_MODES = { + "Keyword": ("Keyword search (BM25) ranks documents based on the relative frequencies of search terms.", 0), + "Semantic": ("Semantic (vector) search ranks results based on their similarity to your search query.", 1), + "Hybrid": ("Hybrid search combines vector and BM25 searches to offer best-of-both-worlds search results.", 0.7), +} + +# Functions +def get_env_vars(env_vars): + """Retrieve environment variables""" + env_vars = {var: os.environ.get(var, "") for var in env_vars} + for var, value in env_vars.items(): + if not value: + st.error(f"{var} not set", icon="🚨") + sys.exit(f"{var} not set") + return env_vars + +def display_chat_messages(): + """Print message history""" + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + if "images" in message: + for i in range(0, len(message["images"]), NUM_IMAGES_PER_ROW): + cols = st.columns(NUM_IMAGES_PER_ROW) + for j, col in enumerate(cols): + if i + j < len(message["images"]): + col.image(message["images"][i + j], width=200) + +def base64_to_image(base64_str): + """Convert base64 string to image""" + return f"data:image/png;base64,{base64_str}" + +def clean_input(input_text): + """Clean user input""" + return input_text.replace('"', "").replace("'", "") + +def setup_sidebar(): + """Setup sidebar elements""" + with st.sidebar: + st.title("🎥🍿 Movie Magic") + st.subheader("The RAG Recommender") + st.markdown("Your Weaviate & AI powered movie recommender. Find the perfect film for any occasion. Just tell us what you're looking for!") + st.header("Settings") + + mode = st.radio("Search Mode", options=list(SEARCH_MODES.keys()), index=2) + year_range = st.slider("Year range", min_value=1950, max_value=2024, value=(1990, 2024)) + st.info(SEARCH_MODES[mode][0]) + st.success("Connected to Weaviate", icon="💚") + + return mode, year_range + +def setup_weaviate_connection(env_vars): + """Setup Weaviate connection""" + return st.connection( + "weaviate", + type=WeaviateConnection, + url=env_vars["WEAVIATE_URL"], + api_key=env_vars["WEAVIATE_API_KEY"], + additional_headers={"X-Cohere-Api-Key": env_vars["COHERE_API_KEY"]}, + ) + +def display_example_prompts(): + """Display example prompt buttons""" + example_prompts = [ + ("sci-fi adventure", "movie night with friends"), + ("romantic comedy", "date night"), + ("animated family film", "family viewing"), + ("classic thriller", "solo watching"), + ("historical drama", "educational evening"), + ("indie comedy-drama", "film club discussion"), + ] + + example_prompts_help = [ + "Search for sci-fi adventure movies suitable for a group viewing", + "Find romantic comedies perfect for a date night", + "Look for animated movies great for family entertainment", + "Discover classic thrillers for a solo movie night", + "Explore historical dramas for an educational movie experience", + "Find indie comedy-dramas ideal for film club discussions", + ] + + st.markdown("---") + st.write("Select an example prompt or enter your own, then **click `Search`** to get recommendations.") + + button_cols = st.columns(3) + button_cols_2 = st.columns(3) + + for i, ((movie_type, occasion), help_text) in enumerate(zip(example_prompts, example_prompts_help)): + col = button_cols[i] if i < 3 else button_cols_2[i-3] + if col.button(f"{movie_type} for a {occasion}", help=help_text): + st.session_state.example_movie_type = movie_type + st.session_state.example_occasion = occasion + return True + return False + +def perform_search(conn, movie_type, rag_prompt, year_range, mode): + """Perform search and display results""" + df = conn.query( + "MovieDemo", + query=movie_type, + return_properties=["title", "tagline", "poster"], + filters=( + WeaviateFilter.by_property("release_year").greater_or_equal(year_range[0]) & + WeaviateFilter.by_property("release_year").less_or_equal(year_range[1]) + ), + limit=SEARCH_LIMIT, + alpha=SEARCH_MODES[mode][1], + ) + + images = [] + if df is None or df.empty: + with st.chat_message("assistant"): + st.write(f"No movies found matching {movie_type} and using {mode}. Please try again.") + st.session_state.messages.append({"role": "assistant", "content": "No movies found. Please try again."}) + return + else: + with st.chat_message("assistant"): + st.write("Raw search results.") + cols = st.columns(NUM_IMAGES_PER_ROW) + for index, row in df.iterrows(): + col = cols[index % NUM_IMAGES_PER_ROW] + if row["poster"]: + col.image(base64_to_image(row["poster"]), width=200) + images.append(base64_to_image(row["poster"])) + else: + col.write(f"No Image Available for: {row['title']}") + st.write("Now generating recommendation from these: ...") + + st.session_state.messages.append( + {"role": "assistant", "content": "Raw search results. Generating recommendation from these: ...", "images": images} + ) + + with conn.client() as client: + collection = client.collections.get("MovieDemo") + response = collection.generate.hybrid( + query=movie_type, + filters=( + Filter.by_property("release_year").greater_or_equal(year_range[0]) & + Filter.by_property("release_year").less_or_equal(year_range[1]) + ), + limit=SEARCH_LIMIT, + alpha=SEARCH_MODES[mode][1], + grouped_task=rag_prompt, + grouped_properties=["title", "tagline"], + ) + + rag_response = response.generated + + with st.chat_message("assistant"): + message_placeholder = st.empty() + full_response = "" + for chunk in rag_response.split(): + full_response += chunk + " " + time.sleep(0.02) + message_placeholder.markdown(full_response + "▌") + message_placeholder.markdown(full_response) + + st.session_state.messages.append( + {"role": "assistant", "content": "Recommendation from these search results: " + full_response} + ) + +def main(): + st.title("🎥🍿 Movie Magic") + + env_vars = get_env_vars(ENV_VARS) + conn = setup_weaviate_connection(env_vars) + mode, year_range = setup_sidebar() + + if "messages" not in st.session_state: + st.session_state.messages = [] + st.session_state.greetings = False + + display_chat_messages() + + if not st.session_state.greetings: + with st.chat_message("assistant"): + intro = "👋 Welcome to Movie Magic! I'm your AI movie recommender. Tell me what kind of film you're in the mood for and the occasion, and I'll suggest some great options." + st.markdown(intro) + st.session_state.messages.append({"role": "assistant", "content": intro}) + st.session_state.greetings = True + + if "example_movie_type" not in st.session_state: + st.session_state.example_movie_type = "" + if "example_occasion" not in st.session_state: + st.session_state.example_occasion = "" + + example_selected = display_example_prompts() + + movie_type = clean_input(st.text_input( + "What movies are you looking for?", + value=st.session_state.example_movie_type, + placeholder="E.g., sci-fi adventure, romantic comedy" + )) + + viewing_occasion = clean_input(st.text_input( + "What occasion is the movie for?", + value=st.session_state.example_occasion, + placeholder="E.g., movie night with friends, date night" + )) + + if st.button("Search") and movie_type and viewing_occasion: + rag_prompt = f"Suggest one to two movies out of the following list, for a {viewing_occasion}. Give a concise yet fun and positive recommendation." + prompt = f"Searching for: {movie_type} for {viewing_occasion}" + with st.chat_message("user"): + st.markdown(prompt) + st.session_state.messages.append({"role": "user", "content": prompt}) + + perform_search(conn, movie_type, rag_prompt, year_range, mode) + st.rerun() + + if example_selected: + st.rerun() + +if __name__ == "__main__": + main() diff --git a/recipes/weaviate/requirements.txt b/recipes/weaviate/requirements.txt new file mode 100644 index 0000000..683014d --- /dev/null +++ b/recipes/weaviate/requirements.txt @@ -0,0 +1,58 @@ +altair==5.4.0 +annotated-types==0.7.0 +anyio==4.4.0 +attrs==24.2.0 +Authlib==1.3.1 +blinker==1.8.2 +cachetools==5.5.0 +certifi==2024.7.4 +cffi==1.17.0 +charset-normalizer==3.3.2 +click==8.1.7 +cryptography==43.0.0 +gitdb==4.0.11 +GitPython==3.1.43 +grpcio==1.65.5 +grpcio-health-checking==1.65.5 +grpcio-tools==1.65.5 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +idna==3.7 +Jinja2==3.1.4 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +mdurl==0.1.2 +narwhals==1.4.2 +numpy==2.1.0 +packaging==24.1 +pandas==2.2.2 +pillow==10.4.0 +protobuf==5.27.3 +pyarrow==17.0.0 +pycparser==2.22 +pydantic==2.8.2 +pydantic_core==2.20.1 +pydeck==0.9.1 +Pygments==2.18.0 +python-dateutil==2.9.0.post0 +pytz==2024.1 +referencing==0.35.1 +requests==2.32.3 +rich==13.7.1 +rpds-py==0.20.0 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.1 +st-weaviate-connection==0.1.0 +streamlit==1.37.1 +tenacity==8.5.0 +toml==0.10.2 +tornado==6.4.1 +typing_extensions==4.12.2 +tzdata==2024.1 +urllib3==2.2.2 +validators==0.33.0 +weaviate-client==4.7.1