├── .gitignore
├── .idea
├── .gitignore
├── document-summarizer-few-shot.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── modules.xml
└── vcs.xml
├── README.md
├── app.py
├── langgraph.json
├── langtweet
├── __init__.py
├── agent.py
└── loading.py
├── poetry.lock
├── pyproject.toml
└── static
└── agent_ui.png
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .ipynb_checkpoints
3 | .langgraph-data
4 | .ruff_cache
5 | .DS_Store
6 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/document-summarizer-few-shot.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LangTweet
2 |
3 | This is an example repository demonstrating the power of dynamic few-shot prompting.
4 | We use LangSmith to serve a few-shot dataset that we use to prompt an LLM to generate tweets in a style we like.
5 | We use LangGraph to orchestrate the (relatively simple) graph.
6 |
7 | 
8 |
9 | **Key Links:**
10 | - [YouTube Walkthrough](https://youtu.be/xFpTMU24eUg)
11 | - [Try out the graph here](https://smith.langchain.com/studio/thread?baseUrl=https://langtweet-0e7095f4dab057f6bbddc823a9eae0aa.default.us.langgraph.app)
12 | - [LangSmith](https://smith.langchain.com/)
13 | - [LangGraph](https://github.com/langchain-ai/langgraph)
14 |
15 | ## The graph
16 |
17 | The graph logic is very simple for now.
18 |
19 | First, we load the content of a given url.
20 | You can find the logic for loading the content in `langtweet/loading.py`.
21 |
22 | After that, we pass the content to a prompt.
23 | This prompt contains some basic instructions, but more importantly a few examples of similar tweets in the past.
24 | The logic for this can be found in `langtweet/agent.py`
25 |
26 | ## The dynamic few-shot selection
27 |
28 | A key part of this application is using dynamic few-shot selection to help prompt.
29 | The prompt instructions for tweeting are pretty basic.
30 | It provides a bit of context then tells the LLM to pay attention to the examples.
31 | Therefor, the examples are doing a lot of lifting here.
32 |
33 | We use [LangSmith](https://smith.langchain.com/) to manage and serve the dataset that we use as examples.
34 |
35 | ## The deployment
36 |
37 | We deploy the graph to LangGraph Cloud.
38 | This gives us a nice API to interact with, as well as fun studio UI to use to try out the graph.
39 |
40 | We've made this studio publicly accessible, you can use it [here](https://smith.langchain.com/studio/thread?baseUrl=https://langtweet-0e7095f4dab057f6bbddc823a9eae0aa.default.us.langgraph.app).
41 |
42 | ## The feedback loop
43 |
44 | A key part of a system like this is a feedback loop to continue to gather examples that can be used in the future.
45 | To that end, we have provided a very simple Streamlit application for invoking the graph and then correcting it and giving feedback.
46 | This feedback automatically creates a new entry in the dataset, which can then be used in the future.
47 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from langgraph_sdk import get_client
3 | from functools import partial
4 | import asyncio
5 | from langsmith import Client
6 |
7 |
8 | ls_client = Client()
9 |
10 |
11 | async def _predict(url):
12 | client = get_client(url="http://localhost:64176")
13 | thread = await client.threads.create()
14 | value = None
15 | async for chunk in client.runs.stream(
16 | thread["thread_id"], "tweeter", input={"url": url}, stream_mode="values"
17 | ):
18 | if chunk.event == "values":
19 | value = chunk.data.get("tweet")
20 | content = chunk.data.get("content")
21 | return (value, content)
22 |
23 |
24 | def generate_tweet(url):
25 | async_func = partial(
26 | _predict,
27 | *[url],
28 | )
29 | loop = asyncio.new_event_loop()
30 | contents = loop.run_until_complete(async_func())
31 | return contents
32 |
33 |
34 | def call_api(tweet, context):
35 | ls_client.create_examples(
36 | inputs=[{"content": context}],
37 | outputs=[{"tweet": tweet}],
38 | dataset_id="f6320c32-a6ff-46f1-9cc4-1b65e3e14a07",
39 | )
40 |
41 |
42 | st.title("Tweet Generator")
43 |
44 | url = st.text_input("Enter a URL:")
45 |
46 | if url:
47 | if "generated_tweet" not in st.session_state:
48 | t, c = generate_tweet(url)
49 | print(c)
50 | st.session_state.generated_tweet = t
51 | st.session_state.context = c
52 |
53 | tweet = st.text_area(
54 | "Generated Tweet:",
55 | value=st.session_state.generated_tweet,
56 | height=300,
57 | key="tweet_area",
58 | )
59 |
60 | if st.button("Give Feedback"):
61 | # Get the current content of the text area
62 | current_tweet = st.session_state.tweet_area
63 | # Send to API
64 | call_api(current_tweet, st.session_state.context)
65 |
66 | st.markdown("---")
67 | st.markdown(
68 | "Enter a URL above to generate a tweet. You can edit the generated tweet and then give feedback."
69 | )
70 |
--------------------------------------------------------------------------------
/langgraph.json:
--------------------------------------------------------------------------------
1 | {
2 | "python_version": "3.11",
3 | "dockerfile_lines": [],
4 | "dependencies": [
5 | "."
6 | ],
7 | "graphs": {
8 | "tweeter": "./langtweet/agent.py:tweet_graph"
9 | },
10 | "env": [
11 | "OPENAI_API_KEY"
12 | ]
13 | }
--------------------------------------------------------------------------------
/langtweet/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.0"
2 |
--------------------------------------------------------------------------------
/langtweet/agent.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict
2 |
3 | from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
4 | from langsmith import client
5 |
6 |
7 | from langchain_openai import ChatOpenAI
8 | from langgraph.graph import StateGraph, END
9 |
10 | from langtweet.loading import get_content
11 |
12 | langsmith_client = client.Client()
13 |
14 |
15 | class GraphInput(TypedDict):
16 | url: str
17 |
18 |
19 | class GraphOutput(TypedDict):
20 | tweet: str
21 |
22 |
23 | def get_contents(state):
24 | url = state["url"]
25 | content = get_content(url)
26 | return {"content": content}
27 |
28 |
29 | class DirectSummarizerState(TypedDict):
30 | url: str
31 | content: str
32 | tweet: str
33 |
34 |
35 | base_prompt = """You are Harrison Chase. You tweet highlighting information related to LangChain, your LLM company.
36 | You use emojis. You use exclamation points but are not overly enthusiastic. You never use hashtags.
37 | You sometimes make spelling mistakes. You are not overly formal. You are not "salesy". You are nice.
38 |
39 | When given an article, write a tweet about it. Make it relevant and specific to the article at hand.
40 |
41 | Pay attention to the examples below. These are good examples. Generate future tweets in the style of the tweets below."""
42 |
43 |
44 | def write_tweet_from_article(state: DirectSummarizerState):
45 |
46 | examples = langsmith_client.similar_examples(
47 | {"content": state["content"]},
48 | dataset_id="f6320c32-a6ff-46f1-9cc4-1b65e3e14a07",
49 | limit=5,
50 | )
51 |
52 | messages = [SystemMessage(content=base_prompt)]
53 | for e in examples:
54 | messages.append(HumanMessage(content=e.inputs["content"]))
55 | messages.append(AIMessage(content=e.outputs["tweet"]))
56 |
57 | messages.append(HumanMessage(content=state["content"]))
58 |
59 | chain = ChatOpenAI(model_name="gpt-4o")
60 |
61 | tweet = chain.invoke(messages)
62 | return {"tweet": tweet.content}
63 |
64 |
65 | tweet_workflow = StateGraph(DirectSummarizerState, input=GraphInput, output=GraphOutput)
66 | tweet_workflow.add_node(get_contents)
67 | tweet_workflow.add_node(write_tweet_from_article)
68 |
69 | tweet_workflow.set_entry_point("get_contents")
70 | tweet_workflow.add_edge("get_contents", "write_tweet_from_article")
71 | tweet_workflow.add_edge("write_tweet_from_article", END)
72 |
73 |
74 | tweet_graph = tweet_workflow.compile()
75 |
--------------------------------------------------------------------------------
/langtweet/loading.py:
--------------------------------------------------------------------------------
1 | import re
2 | from pytube import YouTube
3 | from langchain_community.document_loaders import WebBaseLoader
4 | import requests
5 |
6 | import bs4
7 |
8 |
9 | def is_youtube_url(url):
10 | # Regular expression pattern for YouTube URLs
11 | youtube_regex = (
12 | r"(https?://)?(www\.)?"
13 | "(youtube|youtu|youtube-nocookie)\.(com|be)/"
14 | "(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})"
15 | )
16 |
17 | youtube_regex_match = re.match(youtube_regex, url)
18 | return bool(youtube_regex_match)
19 |
20 |
21 | def is_medium_url(url):
22 | # Regular expression pattern for Medium URLs
23 | medium_regex = r"https?://medium\.com/"
24 | medium_regex_match = re.match(medium_regex, url)
25 | return bool(medium_regex_match)
26 |
27 |
28 | def is_substack_url(url):
29 | # Regular expression pattern for Substack URLs
30 | substack_regex = r"https?://[a-z]*.substack.com/"
31 | substack_regex_match = re.match(substack_regex, url)
32 | return bool(substack_regex_match)
33 |
34 |
35 | def is_github_url(url):
36 | match = re.match(r"https?://github.com/([^/]+)/([^/]+)", url)
37 | return bool(match)
38 |
39 | def is_linkedin_url(url):
40 | match = re.match(r"https?://www.linkedin.com/", url)
41 | return bool(match)
42 |
43 |
44 | def get_github_readme(url):
45 | # Extract owner and repo from the GitHub URL
46 | match = re.match(r"https?://github.com/([^/]+)/([^/]+)", url)
47 |
48 | owner, repo = match.groups()
49 | readme_files = ["README.md", "README.txt", "README", "Readme.md", "readme.md"]
50 |
51 | for branch in ["main", "master"]:
52 | for filename in readme_files:
53 | # Construct the raw content URL
54 | raw_url = (
55 | f"https://raw.githubusercontent.com/{owner}/{repo}/{branch}/{filename}"
56 | )
57 |
58 | try:
59 | response = requests.get(raw_url)
60 | response.raise_for_status() # Raises an HTTPError for bad responses
61 | return response.text
62 | except requests.exceptions.HTTPError as e:
63 | print("ERROR")
64 | print(e)
65 | continue
66 |
67 | print("Could not easily find a README file")
68 | return ""
69 |
70 |
71 | def get_youtube_description(url):
72 | # Create a YouTube object
73 | yt = YouTube(url)
74 |
75 | return f"Title: {yt.title}\n\nDescription: {yt.description}"
76 |
77 |
78 | def get_article_content(url):
79 | # Get article content. Multiple blogging/newsletter websites
80 | # have a common structure where they store their content under
81 | # tag.
82 | # This method extracts the content of those websites.
83 | # Eg. Medium, Substack, Linkedin newsletters
84 | # Can be extended for other domains as well, that store their
85 | # content under tag.
86 | loader = WebBaseLoader(
87 | web_paths=[url],
88 | bs_kwargs=dict(
89 | parse_only=bs4.SoupStrainer("article")
90 | ), # only elements in article tag
91 | )
92 |
93 | docs = loader.load()
94 | return docs[0].page_content
95 |
96 |
97 | def get_content(url):
98 | print("get_content url: ", url)
99 | if is_youtube_url(url):
100 | return get_youtube_description(url)
101 | elif is_github_url(url):
102 | return get_github_readme(url)
103 | elif is_medium_url(url) or is_substack_url(url) or is_linkedin_url(url):
104 | return get_article_content(url)
105 | else:
106 | print("generic url")
107 | loader = WebBaseLoader(url)
108 | docs = loader.load()
109 | return docs[0].page_content
110 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "langtweet"
3 | version = "0.1.0"
4 | description = "Example of using dynamic few-shot prompting to tweet in a specific style."
5 | authors = [
6 | "langchain-ai"
7 | ]
8 | packages = [
9 | { include = "langtweet" },
10 | ]
11 |
12 | [tool.poetry.dependencies]
13 | python = ">=3.9.0,<3.13"
14 | langgraph = "^0.2.0"
15 | langsmith= "^0.1.100"
16 | langchain_anthropic = "^0.1.0"
17 | langchain_core = "^0.2.0"
18 | langchain_openai = "^0.1.0"
19 | tavily-python = "^0.3.0"
20 | pytube = "*"
21 | beautifulsoup4 = "*"
22 | streamlit = "*"
23 | langchain_community = "^0.2.0"
24 |
25 |
26 | [build-system]
27 | requires = ["poetry-core"]
28 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/static/agent_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/langtweet/55746c42bedcb906a13df0e2a439f9d074ed525f/static/agent_ui.png
--------------------------------------------------------------------------------