├── .gitignore ├── .idea ├── .gitignore ├── document-summarizer-few-shot.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml └── vcs.xml ├── README.md ├── app.py ├── langgraph.json ├── langtweet ├── __init__.py ├── agent.py └── loading.py ├── poetry.lock ├── pyproject.toml └── static └── agent_ui.png /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .ipynb_checkpoints 3 | .langgraph-data 4 | .ruff_cache 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/document-summarizer-few-shot.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LangTweet 2 | 3 | This is an example repository demonstrating the power of dynamic few-shot prompting. 4 | We use LangSmith to serve a few-shot dataset that we use to prompt an LLM to generate tweets in a style we like. 5 | We use LangGraph to orchestrate the (relatively simple) graph. 6 | 7 | ![](./static/agent_ui.png) 8 | 9 | **Key Links:** 10 | - [YouTube Walkthrough](https://youtu.be/xFpTMU24eUg) 11 | - [Try out the graph here](https://smith.langchain.com/studio/thread?baseUrl=https://langtweet-0e7095f4dab057f6bbddc823a9eae0aa.default.us.langgraph.app) 12 | - [LangSmith](https://smith.langchain.com/) 13 | - [LangGraph](https://github.com/langchain-ai/langgraph) 14 | 15 | ## The graph 16 | 17 | The graph logic is very simple for now. 18 | 19 | First, we load the content of a given url. 20 | You can find the logic for loading the content in `langtweet/loading.py`. 21 | 22 | After that, we pass the content to a prompt. 23 | This prompt contains some basic instructions, but more importantly a few examples of similar tweets in the past. 24 | The logic for this can be found in `langtweet/agent.py` 25 | 26 | ## The dynamic few-shot selection 27 | 28 | A key part of this application is using dynamic few-shot selection to help prompt. 29 | The prompt instructions for tweeting are pretty basic. 30 | It provides a bit of context then tells the LLM to pay attention to the examples. 31 | Therefor, the examples are doing a lot of lifting here. 32 | 33 | We use [LangSmith](https://smith.langchain.com/) to manage and serve the dataset that we use as examples. 34 | 35 | ## The deployment 36 | 37 | We deploy the graph to LangGraph Cloud. 38 | This gives us a nice API to interact with, as well as fun studio UI to use to try out the graph. 39 | 40 | We've made this studio publicly accessible, you can use it [here](https://smith.langchain.com/studio/thread?baseUrl=https://langtweet-0e7095f4dab057f6bbddc823a9eae0aa.default.us.langgraph.app). 41 | 42 | ## The feedback loop 43 | 44 | A key part of a system like this is a feedback loop to continue to gather examples that can be used in the future. 45 | To that end, we have provided a very simple Streamlit application for invoking the graph and then correcting it and giving feedback. 46 | This feedback automatically creates a new entry in the dataset, which can then be used in the future. 47 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from langgraph_sdk import get_client 3 | from functools import partial 4 | import asyncio 5 | from langsmith import Client 6 | 7 | 8 | ls_client = Client() 9 | 10 | 11 | async def _predict(url): 12 | client = get_client(url="http://localhost:64176") 13 | thread = await client.threads.create() 14 | value = None 15 | async for chunk in client.runs.stream( 16 | thread["thread_id"], "tweeter", input={"url": url}, stream_mode="values" 17 | ): 18 | if chunk.event == "values": 19 | value = chunk.data.get("tweet") 20 | content = chunk.data.get("content") 21 | return (value, content) 22 | 23 | 24 | def generate_tweet(url): 25 | async_func = partial( 26 | _predict, 27 | *[url], 28 | ) 29 | loop = asyncio.new_event_loop() 30 | contents = loop.run_until_complete(async_func()) 31 | return contents 32 | 33 | 34 | def call_api(tweet, context): 35 | ls_client.create_examples( 36 | inputs=[{"content": context}], 37 | outputs=[{"tweet": tweet}], 38 | dataset_id="f6320c32-a6ff-46f1-9cc4-1b65e3e14a07", 39 | ) 40 | 41 | 42 | st.title("Tweet Generator") 43 | 44 | url = st.text_input("Enter a URL:") 45 | 46 | if url: 47 | if "generated_tweet" not in st.session_state: 48 | t, c = generate_tweet(url) 49 | print(c) 50 | st.session_state.generated_tweet = t 51 | st.session_state.context = c 52 | 53 | tweet = st.text_area( 54 | "Generated Tweet:", 55 | value=st.session_state.generated_tweet, 56 | height=300, 57 | key="tweet_area", 58 | ) 59 | 60 | if st.button("Give Feedback"): 61 | # Get the current content of the text area 62 | current_tweet = st.session_state.tweet_area 63 | # Send to API 64 | call_api(current_tweet, st.session_state.context) 65 | 66 | st.markdown("---") 67 | st.markdown( 68 | "Enter a URL above to generate a tweet. You can edit the generated tweet and then give feedback." 69 | ) 70 | -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "python_version": "3.11", 3 | "dockerfile_lines": [], 4 | "dependencies": [ 5 | "." 6 | ], 7 | "graphs": { 8 | "tweeter": "./langtweet/agent.py:tweet_graph" 9 | }, 10 | "env": [ 11 | "OPENAI_API_KEY" 12 | ] 13 | } -------------------------------------------------------------------------------- /langtweet/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.0" 2 | -------------------------------------------------------------------------------- /langtweet/agent.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langchain_core.messages import SystemMessage, HumanMessage, AIMessage 4 | from langsmith import client 5 | 6 | 7 | from langchain_openai import ChatOpenAI 8 | from langgraph.graph import StateGraph, END 9 | 10 | from langtweet.loading import get_content 11 | 12 | langsmith_client = client.Client() 13 | 14 | 15 | class GraphInput(TypedDict): 16 | url: str 17 | 18 | 19 | class GraphOutput(TypedDict): 20 | tweet: str 21 | 22 | 23 | def get_contents(state): 24 | url = state["url"] 25 | content = get_content(url) 26 | return {"content": content} 27 | 28 | 29 | class DirectSummarizerState(TypedDict): 30 | url: str 31 | content: str 32 | tweet: str 33 | 34 | 35 | base_prompt = """You are Harrison Chase. You tweet highlighting information related to LangChain, your LLM company. 36 | You use emojis. You use exclamation points but are not overly enthusiastic. You never use hashtags. 37 | You sometimes make spelling mistakes. You are not overly formal. You are not "salesy". You are nice. 38 | 39 | When given an article, write a tweet about it. Make it relevant and specific to the article at hand. 40 | 41 | Pay attention to the examples below. These are good examples. Generate future tweets in the style of the tweets below.""" 42 | 43 | 44 | def write_tweet_from_article(state: DirectSummarizerState): 45 | 46 | examples = langsmith_client.similar_examples( 47 | {"content": state["content"]}, 48 | dataset_id="f6320c32-a6ff-46f1-9cc4-1b65e3e14a07", 49 | limit=5, 50 | ) 51 | 52 | messages = [SystemMessage(content=base_prompt)] 53 | for e in examples: 54 | messages.append(HumanMessage(content=e.inputs["content"])) 55 | messages.append(AIMessage(content=e.outputs["tweet"])) 56 | 57 | messages.append(HumanMessage(content=state["content"])) 58 | 59 | chain = ChatOpenAI(model_name="gpt-4o") 60 | 61 | tweet = chain.invoke(messages) 62 | return {"tweet": tweet.content} 63 | 64 | 65 | tweet_workflow = StateGraph(DirectSummarizerState, input=GraphInput, output=GraphOutput) 66 | tweet_workflow.add_node(get_contents) 67 | tweet_workflow.add_node(write_tweet_from_article) 68 | 69 | tweet_workflow.set_entry_point("get_contents") 70 | tweet_workflow.add_edge("get_contents", "write_tweet_from_article") 71 | tweet_workflow.add_edge("write_tweet_from_article", END) 72 | 73 | 74 | tweet_graph = tweet_workflow.compile() 75 | -------------------------------------------------------------------------------- /langtweet/loading.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pytube import YouTube 3 | from langchain_community.document_loaders import WebBaseLoader 4 | import requests 5 | 6 | import bs4 7 | 8 | 9 | def is_youtube_url(url): 10 | # Regular expression pattern for YouTube URLs 11 | youtube_regex = ( 12 | r"(https?://)?(www\.)?" 13 | "(youtube|youtu|youtube-nocookie)\.(com|be)/" 14 | "(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})" 15 | ) 16 | 17 | youtube_regex_match = re.match(youtube_regex, url) 18 | return bool(youtube_regex_match) 19 | 20 | 21 | def is_medium_url(url): 22 | # Regular expression pattern for Medium URLs 23 | medium_regex = r"https?://medium\.com/" 24 | medium_regex_match = re.match(medium_regex, url) 25 | return bool(medium_regex_match) 26 | 27 | 28 | def is_substack_url(url): 29 | # Regular expression pattern for Substack URLs 30 | substack_regex = r"https?://[a-z]*.substack.com/" 31 | substack_regex_match = re.match(substack_regex, url) 32 | return bool(substack_regex_match) 33 | 34 | 35 | def is_github_url(url): 36 | match = re.match(r"https?://github.com/([^/]+)/([^/]+)", url) 37 | return bool(match) 38 | 39 | def is_linkedin_url(url): 40 | match = re.match(r"https?://www.linkedin.com/", url) 41 | return bool(match) 42 | 43 | 44 | def get_github_readme(url): 45 | # Extract owner and repo from the GitHub URL 46 | match = re.match(r"https?://github.com/([^/]+)/([^/]+)", url) 47 | 48 | owner, repo = match.groups() 49 | readme_files = ["README.md", "README.txt", "README", "Readme.md", "readme.md"] 50 | 51 | for branch in ["main", "master"]: 52 | for filename in readme_files: 53 | # Construct the raw content URL 54 | raw_url = ( 55 | f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{filename}" 56 | ) 57 | 58 | try: 59 | response = requests.get(raw_url) 60 | response.raise_for_status() # Raises an HTTPError for bad responses 61 | return response.text 62 | except requests.exceptions.HTTPError as e: 63 | print("ERROR") 64 | print(e) 65 | continue 66 | 67 | print("Could not easily find a README file") 68 | return "" 69 | 70 | 71 | def get_youtube_description(url): 72 | # Create a YouTube object 73 | yt = YouTube(url) 74 | 75 | return f"Title: {yt.title}\n\nDescription: {yt.description}" 76 | 77 | 78 | def get_article_content(url): 79 | # Get article content. Multiple blogging/newsletter websites 80 | # have a common structure where they store their content under 81 | #
tag. 82 | # This method extracts the content of those websites. 83 | # Eg. Medium, Substack, Linkedin newsletters 84 | # Can be extended for other domains as well, that store their 85 | # content under
tag. 86 | loader = WebBaseLoader( 87 | web_paths=[url], 88 | bs_kwargs=dict( 89 | parse_only=bs4.SoupStrainer("article") 90 | ), # only elements in article tag 91 | ) 92 | 93 | docs = loader.load() 94 | return docs[0].page_content 95 | 96 | 97 | def get_content(url): 98 | print("get_content url: ", url) 99 | if is_youtube_url(url): 100 | return get_youtube_description(url) 101 | elif is_github_url(url): 102 | return get_github_readme(url) 103 | elif is_medium_url(url) or is_substack_url(url) or is_linkedin_url(url): 104 | return get_article_content(url) 105 | else: 106 | print("generic url") 107 | loader = WebBaseLoader(url) 108 | docs = loader.load() 109 | return docs[0].page_content 110 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "langtweet" 3 | version = "0.1.0" 4 | description = "Example of using dynamic few-shot prompting to tweet in a specific style." 5 | authors = [ 6 | "langchain-ai" 7 | ] 8 | packages = [ 9 | { include = "langtweet" }, 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = ">=3.9.0,<3.13" 14 | langgraph = "^0.2.0" 15 | langsmith= "^0.1.100" 16 | langchain_anthropic = "^0.1.0" 17 | langchain_core = "^0.2.0" 18 | langchain_openai = "^0.1.0" 19 | tavily-python = "^0.3.0" 20 | pytube = "*" 21 | beautifulsoup4 = "*" 22 | streamlit = "*" 23 | langchain_community = "^0.2.0" 24 | 25 | 26 | [build-system] 27 | requires = ["poetry-core"] 28 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /static/agent_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/langtweet/55746c42bedcb906a13df0e2a439f9d074ed525f/static/agent_ui.png --------------------------------------------------------------------------------