🤖 AI 实战•60 分钟•新手•Jun 19, 2026
1 小时上手 LangChain:构建你的第一个实用 AI 应用
60 分钟内构建一个功能完整的 AI 研究助手,能够总结网页文章并回答相关问题。
#ai#langchain#llm#python#chatbot
学完这一小时,你将拥有一个可工作的 AI 研究助手,它能够抓取任意网页文章、生成智能摘要,并使用 LangChain 和 OpenAI 回答内容相关问题。
🎯 成果展示
一个命令行研究助手,输入 URL 即可提取内容、创建智能摘要,并支持后续问答:
$ python research_assistant.py
Enter article URL: https://example.com/article
✅ Article processed: "The Future of AI"
📝 Summary: This article discusses emerging trends in artificial intelligence...
Ask a question (or 'quit'): What are the main benefits mentioned?
🤖 The article highlights three key benefits: automation of repetitive tasks...
⏱️ 时间分配
0–10min
环境搭建和 LangChain 安装
10–25min
构建网页爬虫和文本处理器
25–40min
创建摘要生成链
40–55min
添加带记忆的问答功能
55–60min
测试并部署你的助手
📋 前置条件
- 已安装 Python 3.8+
- OpenAI API 密钥(免费版即可)
- 基础的 Python 和命令行操作经验
- 任意文本编辑器或 IDE
第 1 步:搭建 LangChain 环境(0-10 分钟)
创建新项目目录并安装所需包:
mkdir langchain-assistant
cd langchain-assistant
python -m venv venv
source venv/bin/activate # Windows 系统: venv\Scripts\activate
安装 LangChain 和依赖:
pip install langchain openai requests beautifulsoup4 python-dotenv
创建环境变量文件:
echo "OPENAI_API_KEY=your_api_key_here" > .env
将 your_api_key_here 替换为你从 platform.openai.com 获得的实际 API 密钥。
✅
Checkpoint
运行 python -c "import langchain; print('LangChain installed successfully!')" - 会发生什么?
第 2 步:构建网页爬虫(10-25 分钟)
创建 scraper.py 来抓取和清理网页内容:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
class WebScraper:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)'
})
def scrape_article(self, url):
"""从网页文章中提取主要内容"""
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
# 移除不需要的元素
for element in soup(['script', 'style', 'nav', 'footer', 'header']):
element.decompose()
# 尝试找到主要内容
content = self._extract_main_content(soup)
title = soup.find('title')
title_text = title.get_text().strip() if title else "Unknown Title"
return {
'title': title_text,
'content': content,
'url': url
}
except Exception as e:
raise Exception(f"Failed to scrape {url}: {str(e)}")
def _extract_main_content(self, soup):
"""提取主要文本内容"""
# 常见内容选择器
selectors = ['article', 'main', '.content', '.post-content', '.entry-content']
for selector in selectors:
content_div = soup.select_one(selector)
if content_div:
return content_div.get_text(separator=' ', strip=True)
# 回退到 body
body = soup.find('body')
return body.get_text(separator=' ', strip=True) if body else ""
测试你的爬虫:
# test_scraper.py
from scraper import WebScraper
scraper = WebScraper()
article = scraper.scrape_article("https://example.com")
print(f"Title: {article['title']}")
print(f"Content length: {len(article['content'])} characters")
✅
Checkpoint
测试爬取一个简单的文章 URL - 是否能正常返回标题和内容?
第 3 步:创建摘要生成链(25-40 分钟)
创建 summarizer.py 来构建你的第一个 LangChain 链:
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
load_dotenv()
class ArticleSummarizer:
def __init__(self):
self.llm = OpenAI(
temperature=0.3,
openai_api_key=os.getenv("OPENAI_API_KEY")
)
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=3000,
chunk_overlap=200
)
# 用于生成更好摘要的自定义提示
self.prompt_template = """
Summarize this article section in a clear, concise way:
{text}
Focus on:
- Main points and key insights
- Important facts and data
- Conclusions or recommendations
Summary:
"""
self.prompt = PromptTemplate(
template=self.prompt_template,
input_variables=["text"]
)
def summarize_article(self, article_data):
"""为文章创建智能摘要"""
content = article_data['content']
if len(content) < 100:
raise ValueError("Article content too short to summarize")
# 将文本分割为可管理的块
texts = self.text_splitter.split_text(content)
docs = [Document(page_content=text) for text in texts]
# 创建摘要链
chain = load_summarize_chain(
self.llm,
chain_type="map_reduce",
map_prompt=self.prompt,
combine_prompt=self.prompt
)
# 生成摘要
summary = chain.run(docs)
return {
'title': article_data['title'],
'url': article_data['url'],
'summary': summary.strip(),
'original_length': len(content),
'chunks_processed': len(docs)
}
测试摘要器:
# test_summary.py
from scraper import WebScraper
from summarizer import ArticleSummarizer
scraper = WebScraper()
summarizer = ArticleSummarizer()
article = scraper.scrape_article("https://example.com/your-test-article")
summary = summarizer.summarize_article(article)
print(f"Original: {summary['original_length']} chars")
print(f"Summary: {summary['summary']}")
第 4 步:添加带记忆的问答(40-55 分钟)
创建 qa_system.py 来处理关于文章的问题:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.docstore.document import Document
class QASystem:
def __init__(self):
self.llm = OpenAI(temperature=0.1)
self.embeddings = OpenAIEmbeddings()
self.memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
self.qa_chain = None
self.vectorstore = None
def setup_qa_chain(self, article_data):
"""为特定文章设置问答系统"""
# 创建文档块
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
texts = text_splitter.split_text(article_data['content'])
docs = [Document(page_content=text) for text in texts]
# 为语义搜索创建向量存储
self.vectorstore = FAISS.from_documents(docs, self.embeddings)
# 设置对话链
self.qa_chain = ConversationalRetrievalChain.from_llm(
self.llm,
retriever=self.vectorstore.as_retriever(search_kwargs={"k": 3}),
memory=self.memory,
return_source_documents=True
)
def ask_question(self, question):
"""询问关于文章的问题"""
if not self.qa_chain:
raise ValueError("QA system not initialized. Call setup_qa_chain first.")
result = self.qa_chain({"question": question})
return {
'answer': result['answer'],
'sources': len(result['source_documents']),
'confidence': 'high' if len(result['source_documents']) >= 2 else 'medium'
}
def reset_conversation(self):
"""清除对话历史"""
self.memory.clear()
✅
Checkpoint
用一篇文章初始化问答系统并询问"这篇文章讲的是什么?" - 能得到相关回答吗?
第 5 步:打包发布(55-60 分钟)
创建主应用程序 research_assistant.py:
#!/usr/bin/env python3
from scraper import WebScraper
from summarizer import ArticleSummarizer
from qa_system import QASystem
def main():
print("🔬 AI Research Assistant")
print("=" * 40)
# 初始化组件
scraper = WebScraper()
summarizer = ArticleSummarizer()
qa_system = QASystem()
try:
# 获取文章 URL
url = input("Enter article URL: ").strip()
print("📥 Fetching article...")
article = scraper.scrape_article(url)
print("🤖 Generating summary...")
summary_result = summarizer.summarize_article(article)
print(f"\n✅ Article processed: \"{summary_result['title'][:50]}...\"")
print(f"📝 Summary ({summary_result['chunks_processed']} sections):")
print(f"{summary_result['summary']}\n")
# 设置问答
print("🔧 Setting up Q&A system...")
qa_system.setup_qa_chain(article)
# 交互式问答循环
print("💬 Ask questions about the article (type 'quit' to exit):")
while True:
question = input("\nYour question: ").strip()
if question.lower() in ['quit', 'exit', 'q']:
break
if not question:
continue
try:
response = qa_system.ask_question(question)
print(f"🤖 {response['answer']}")
print(f" (Confidence: {response['confidence']}, Sources: {response['sources']})")
except Exception as e:
print(f"❌ Error answering question: {e}")
print("\n👋 Thanks for using AI Research Assistant!")
except KeyboardInterrupt:
print("\n👋 Goodbye!")
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
main()
设为可执行并测试:
chmod +x research_assistant.py
python research_assistant.py
🎉 你的 AI 研究助手已就绪!用一篇新闻文章或博客帖子测试,看它如何总结内容并回答你的问题。
🎁 进阶挑战
- 自定义提取:使用
PyPDF2添加对 PDF 文件的支持,处理研究论文 - 网页界面:构建简单的 Flask/Streamlit 网页版而非命令行版本
- 导出功能:将摘要和问答记录保存为 markdown 文件以便后续查看
📚 下一步学什么
→
1 小时学会专业提示词:让你的 ChatGPT 输出质量提升 10 倍
掌握 8 个实战验证的提示词模式,将基础的 ChatGPT 对话转变为精准、专业的输出结果
60 min
🔗 扩展资源
- LangChain 官方文档 - 完整框架参考
- OpenAI API 指南 - API 使用和最佳实践
- 向量数据库详解 - 理解嵌入和搜索
- 提示工程指南 - 为 LLM 编写更好的提示
- LangChain Cookbook - 实际案例和模式