-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlangchain_helper.py
58 lines (41 loc) · 1.93 KB
/
langchain_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from langchain_community.document_loaders import YoutubeLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()
embeddings = OpenAIEmbeddings()
def create_db(video_url: str) -> FAISS:
loader = YoutubeLoader.from_youtube_url(
video_url, add_video_info=False
)
transcript = loader.load()
textsplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = textsplitter.split_documents(transcript)
db = FAISS.from_documents(docs, embeddings) # connection between the otained yt transcript data and the llm.
return db
video_url = 'https://www.youtube.com/watch?v=lG7Uxts9SXs&t=18s'
print(create_db(video_url))
def get_query_response(db, query, k=4):
docs = db.similarity_search(query, k=k)
docs_page_content = " ".join([d.page_content for d in docs])
llm = ChatOpenAI(model_name="gpt-3.5-turbo")
prompt = PromptTemplate(
input_variables=["question", "docs"],
template="""
You are a helpful assistant that that can answer questions about youtube videos
based on the video's transcript.
Answer the following question: {question}
By searching the following video transcript: {docs}
Only use the factual information from the transcript to answer the question.
If you feel like you don't have enough information to answer the question, say "I don't know".
Your answers should be verbose and detailed.
""",
)
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run(question=query, docs=docs_page_content)
response = response.replace("\n", "")
return response, docs