OpenJiuwen DeepSearch 技术教程
2026-03-16 10:41:32
113次阅读
0个评论
OpenJiuwen DeepSearch 技术教程
概述
deepsearch 是知识增强的高性能深度搜索和研究引擎,提供企业级 Agentic AI 搜索和研究能力,采用多 Agent 协作架构。
技术栈
- 编程语言: Python 3.11
- 核心框架: 基于 openJiuwen agent-core
- 后端服务: FastAPI
- 包管理器: uv
- 版本: 0.2.0
核心特性
1. 基于模板的报告生成
支持多种输出格式,满足不同场景需求。
支持格式
from openjiuwen_deepsearch import ReportGenerator
# 初始化报告生成器
generator = ReportGenerator(
template_dir="./templates",
output_format="markdown" # markdown, html, word, pdf
)
# 从示例提取模板
template = await generator.extract_template(
example_report="./examples/financial_report.md",
template_name="financial_analysis"
)
# 使用模板生成报告
report = await generator.generate(
template_name="financial_analysis",
data={
"title": "2024年Q1财务分析",
"company": "ABC公司",
"metrics": {...}
}
)
# 导出为不同格式
await generator.export(report, "report.pdf")
await generator.export(report, "report.docx")
await generator.export(report, "report.html")
自定义模板
# templates/custom_report.md
# {{ title }}
## 摘要
{{ summary }}
## 主要发现
{% for finding in findings %}
- {{ finding }}
{% endfor %}
## 详细分析
{{ detailed_analysis }}
## 结论
{{ conclusion }}
## 参考资料
{% for ref in references %}
{{ loop.index }}. {{ ref.title }} - {{ ref.url }}
{% endfor %}
2. 知识增强检索
多种检索策略,确保信息全面准确。
关键词搜索
from openjiuwen_deepsearch import KeywordSearch
# 关键词搜索引擎
keyword_search = KeywordSearch(
backend="elasticsearch",
index_name="knowledge_base"
)
results = await keyword_search.search(
query="人工智能在医疗领域的应用",
filters={
"date_range": "2023-01-01:2024-12-31",
"source_type": ["academic", "news"]
},
top_k=20
)
向量搜索
from openjiuwen_deepsearch import VectorSearch
# 向量搜索引擎
vector_search = VectorSearch(
embedding_model="text-embedding-ada-002",
vector_db="milvus",
collection_name="documents"
)
results = await vector_search.search(
query="机器学习算法优化",
top_k=10,
similarity_threshold=0.75
)
图检索
from openjiuwen_deepsearch import GraphSearch
# 知识图谱检索
graph_search = GraphSearch(
neo4j_uri="bolt://localhost:7687",
neo4j_user="neo4j",
neo4j_password="password"
)
# 实体关系检索
results = await graph_search.search(
entity="人工智能",
relation_types=["应用领域", "相关技术", "发展趋势"],
depth=2
)
融合检索
from openjiuwen_deepsearch import FusionRetrieval
# 融合检索器
fusion = FusionRetrieval(
retrievers=[
keyword_search,
vector_search,
graph_search
],
fusion_strategy="rrf", # Reciprocal Rank Fusion
weights=[0.3, 0.5, 0.2]
)
results = await fusion.retrieve(
query="深度学习在自然语言处理中的应用",
top_k=30
)
本地知识库集成
from openjiuwen_deepsearch import LocalKnowledgeBase
# 本地知识库
local_kb = LocalKnowledgeBase(
storage_path="./knowledge_base",
index_type="faiss"
)
# 添加文档
await local_kb.add_documents([
{"content": "文档内容1", "metadata": {...}},
{"content": "文档内容2", "metadata": {...}}
])
# 检索
results = await local_kb.search("查询内容", top_k=10)
网络搜索集成
from openjiuwen_deepsearch import WebSearch
# 网络搜索
web_search = WebSearch(
providers=["google", "bing"],
api_keys={
"google": "your-google-api-key",
"bing": "your-bing-api-key"
}
)
results = await web_search.search(
query="最新AI技术进展",
num_results=10,
date_restrict="d7" # 最近7天
)
3. 多 Agent 协作
四个专业 Agent 协同工作,完成复杂研究任务。
查询规划 Agent
from openjiuwen_deepsearch.framework.openjiuwen.agent import QueryPlanningAgent
# 查询规划 Agent
planner = QueryPlanningAgent(
name="query_planner",
llm_config={
"model": "gpt-4",
"temperature": 0.3
}
)
# 生成研究计划
plan = await planner.plan(
research_question="分析人工智能对就业市场的影响",
constraints={
"time_limit": "30分钟",
"depth": "comprehensive",
"focus_areas": ["技术岗位", "制造业", "服务业"]
}
)
print(plan.sub_queries)
# [
# "AI对技术岗位的影响",
# "AI对制造业的冲击",
# "AI创造的新就业机会",
# "未来就业趋势预测"
# ]
信息收集 Agent
from openjiuwen_deepsearch.framework.openjiuwen.agent import InformationCollectionAgent
# 信息收集 Agent
collector = InformationCollectionAgent(
name="info_collector",
retrievers=[fusion, web_search, local_kb]
)
# 收集信息
info = await collector.collect(
sub_queries=plan.sub_queries,
max_sources_per_query=10
)
print(f"收集了 {len(info.documents)} 个文档")
print(f"来源分布: {info.source_distribution}")
理解分析 Agent
from openjiuwen_deepsearch.framework.openjiuwen.agent import UnderstandingAnalysisAgent
# 理解分析 Agent
analyzer = UnderstandingAnalysisAgent(
name="analyzer",
llm_config={"model": "gpt-4"}
)
# 分析信息
analysis = await analyzer.analyze(
documents=info.documents,
research_question="分析人工智能对就业市场的影响",
analysis_framework={
"dimensions": ["影响程度", "时间范围", "行业分布"],
"metrics": ["就业率变化", "薪资影响", "技能需求"]
}
)
print(analysis.key_findings)
print(analysis.statistical_insights)
报告生成 Agent
from openjiuwen_deepsearch.framework.openjiuwen.agent import ReportGenerationAgent
# 报告生成 Agent
reporter = ReportGenerationAgent(
name="reporter",
template_engine=generator
)
# 生成报告
report = await reporter.generate(
analysis=analysis,
template="comprehensive_research",
output_format="markdown"
)
print(report.content)
4. 交互式功能
用户反馈机制
from openjiuwen_deepsearch import InteractiveResearch
# 交互式研究
research = InteractiveResearch()
# 开始研究
session = await research.start(
question="研究主题",
enable_feedback=True
)
# 规划阶段反馈
plan = await research.get_plan(session.id)
user_feedback = await research.request_feedback(
session.id,
"请确认研究计划是否合理",
plan.sub_queries
)
# 根据反馈调整
if user_feedback.needs_adjustment:
await research.adjust_plan(session.id, user_feedback.suggestions)
# 继续执行
result = await research.continue(session.id)
协作修改
# 协作编辑报告
async def collaborative_edit(report, user_comments):
for comment in user_comments:
if comment.section == "methodology":
report.methodology = await revise_section(
report.methodology,
comment.suggestion
)
return report
快速开始
安装
# 克隆仓库
git clone https://gitcode.com/openjiuwen/deepsearch.git
cd deepsearch
# 安装依赖
uv sync
# 配置环境
cp .env.example .env
# 编辑 .env 填入 API 密钥
命令行使用
# 基础搜索
deepsearch search "搜索查询"
# 深度研究
deepsearch research "研究问题" \
--depth comprehensive \
--output report.md
# 交互式研究
deepsearch interactive \
--question "研究主题" \
--enable-feedback
Python API
import asyncio
from openjiuwen_deepsearch import DeepSearchEngine
async def main():
# 初始化引擎
engine = DeepSearchEngine(
llm_config={
"model": "gpt-4",
"api_key": "your-api-key"
},
retrievers=["keyword", "vector", "web"]
)
# 执行研究
result = await engine.research(
question="分析2024年AI行业发展趋势",
depth="comprehensive",
output_format="markdown"
)
# 保存报告
result.save("ai_trends_2024.md")
# 查看统计
print(f"耗时: {result.duration}秒")
print(f"检索文档: {result.documents_count}")
print(f"引用来源: {result.citations_count}")
asyncio.run(main())
高级用法
自定义 Agent
from openjiuwen_deepsearch.framework.openjiuwen.agent import BaseAgent
class CustomAnalysisAgent(BaseAgent):
async def execute(self, documents, research_question):
# 自定义分析逻辑
insights = []
for doc in documents:
# 特定领域分析
if doc.metadata.get("domain") == "finance":
insight = await self.analyze_financial(doc)
insights.append(insight)
return insights
async def analyze_financial(self, document):
# 金融领域特定分析
return {
"document": document.id,
"financial_metrics": {...},
"trends": [...]
}
工作流定制
from openjiuwen_deepsearch import ResearchWorkflow
# 自定义工作流
workflow = ResearchWorkflow()
# 添加步骤
workflow.add_step("planning", planner)
workflow.add_step("collection", collector)
workflow.add_step("analysis", analyzer)
workflow.add_step("generation", reporter)
# 添加条件分支
workflow.add_condition(
"collection",
condition=lambda result: len(result.documents) < 10,
true_branch="expand_search",
false_branch="analysis"
)
# 执行工作流
result = await workflow.execute(research_question)
性能优化
from openjiuwen_deepsearch import ParallelRetrieval
# 并行检索
parallel_retrieval = ParallelRetrieval(
retrievers=[keyword_search, vector_search, web_search],
max_workers=5
)
# 并行执行
results = await parallel_retrieval.retrieve_all(
queries=["查询1", "查询2", "查询3"],
timeout=30
)
应用场景
1. 金融分析报告
# 金融研究
result = await engine.research(
question="分析某公司2024年Q1财务表现",
domain="finance",
template="financial_analysis",
data_sources=["财报", "新闻", "分析师报告"]
)
2. 学术和政策研究
# 学术研究
result = await engine.research(
question="评估碳交易政策对制造业的影响",
domain="academic",
template="academic_paper",
citation_style="apa",
peer_reviewed_only=True
)
3. 多源验证研究
# 多源验证
result = await engine.research(
question="验证某项声明的真实性",
verification_mode=True,
min_sources=5,
cross_check=True
)
最佳实践
1. 明确研究范围
# 设置明确的研究边界
result = await engine.research(
question="研究问题",
scope={
"time_range": "2023-2024",
"geographic_focus": "中国",
"industries": ["科技", "金融"],
"exclude": ["社交媒体评论"]
}
)
2. 分阶段执行
# 分阶段研究
session = await engine.start_research(question)
# 阶段1: 初步探索
exploration = await engine.explore(session.id)
# 阶段2: 深入分析
deep_analysis = await engine.deep_dive(
session.id,
focus_areas=exploration.promising_areas
)
# 阶段3: 报告生成
report = await engine.finalize(session.id)
3. 质量控制
# 质量检查
quality_check = await engine.quality_check(report)
if quality_check.score < 0.8:
# 重新收集信息
await engine.enhance_research(
session.id,
areas=quality_check.weak_areas
)
相关资源
总结
deepsearch 通过多 Agent 协作和知识增强检索,提供了强大的深度研究和报告生成能力。无论是金融分析、学术研究还是政策评估,都能生成高质量、可追溯的专业报告。
00
- 0回答
- 0粉丝
- 0关注
