OpenJiuwen DeepSearch 技术教程

2026-03-16 10:41:32
113次阅读
0个评论

OpenJiuwen DeepSearch 技术教程

概述

deepsearch 是知识增强的高性能深度搜索和研究引擎,提供企业级 Agentic AI 搜索和研究能力,采用多 Agent 协作架构。

技术栈

  • 编程语言: Python 3.11
  • 核心框架: 基于 openJiuwen agent-core
  • 后端服务: FastAPI
  • 包管理器: uv
  • 版本: 0.2.0

核心特性

1. 基于模板的报告生成

支持多种输出格式,满足不同场景需求。

支持格式

from openjiuwen_deepsearch import ReportGenerator

# 初始化报告生成器
generator = ReportGenerator(
    template_dir="./templates",
    output_format="markdown"  # markdown, html, word, pdf
)

# 从示例提取模板
template = await generator.extract_template(
    example_report="./examples/financial_report.md",
    template_name="financial_analysis"
)

# 使用模板生成报告
report = await generator.generate(
    template_name="financial_analysis",
    data={
        "title": "2024年Q1财务分析",
        "company": "ABC公司",
        "metrics": {...}
    }
)

# 导出为不同格式
await generator.export(report, "report.pdf")
await generator.export(report, "report.docx")
await generator.export(report, "report.html")

自定义模板

# templates/custom_report.md

# {{ title }}

## 摘要
{{ summary }}

## 主要发现
{% for finding in findings %}
- {{ finding }}
{% endfor %}

## 详细分析
{{ detailed_analysis }}

## 结论
{{ conclusion }}

## 参考资料
{% for ref in references %}
{{ loop.index }}. {{ ref.title }} - {{ ref.url }}
{% endfor %}

2. 知识增强检索

多种检索策略,确保信息全面准确。

关键词搜索

from openjiuwen_deepsearch import KeywordSearch

# 关键词搜索引擎
keyword_search = KeywordSearch(
    backend="elasticsearch",
    index_name="knowledge_base"
)

results = await keyword_search.search(
    query="人工智能在医疗领域的应用",
    filters={
        "date_range": "2023-01-01:2024-12-31",
        "source_type": ["academic", "news"]
    },
    top_k=20
)

向量搜索

from openjiuwen_deepsearch import VectorSearch

# 向量搜索引擎
vector_search = VectorSearch(
    embedding_model="text-embedding-ada-002",
    vector_db="milvus",
    collection_name="documents"
)

results = await vector_search.search(
    query="机器学习算法优化",
    top_k=10,
    similarity_threshold=0.75
)

图检索

from openjiuwen_deepsearch import GraphSearch

# 知识图谱检索
graph_search = GraphSearch(
    neo4j_uri="bolt://localhost:7687",
    neo4j_user="neo4j",
    neo4j_password="password"
)

# 实体关系检索
results = await graph_search.search(
    entity="人工智能",
    relation_types=["应用领域", "相关技术", "发展趋势"],
    depth=2
)

融合检索

from openjiuwen_deepsearch import FusionRetrieval

# 融合检索器
fusion = FusionRetrieval(
    retrievers=[
        keyword_search,
        vector_search,
        graph_search
    ],
    fusion_strategy="rrf",  # Reciprocal Rank Fusion
    weights=[0.3, 0.5, 0.2]
)

results = await fusion.retrieve(
    query="深度学习在自然语言处理中的应用",
    top_k=30
)

本地知识库集成

from openjiuwen_deepsearch import LocalKnowledgeBase

# 本地知识库
local_kb = LocalKnowledgeBase(
    storage_path="./knowledge_base",
    index_type="faiss"
)

# 添加文档
await local_kb.add_documents([
    {"content": "文档内容1", "metadata": {...}},
    {"content": "文档内容2", "metadata": {...}}
])

# 检索
results = await local_kb.search("查询内容", top_k=10)

网络搜索集成

from openjiuwen_deepsearch import WebSearch

# 网络搜索
web_search = WebSearch(
    providers=["google", "bing"],
    api_keys={
        "google": "your-google-api-key",
        "bing": "your-bing-api-key"
    }
)

results = await web_search.search(
    query="最新AI技术进展",
    num_results=10,
    date_restrict="d7"  # 最近7天
)

3. 多 Agent 协作

四个专业 Agent 协同工作,完成复杂研究任务。

查询规划 Agent

from openjiuwen_deepsearch.framework.openjiuwen.agent import QueryPlanningAgent

# 查询规划 Agent
planner = QueryPlanningAgent(
    name="query_planner",
    llm_config={
        "model": "gpt-4",
        "temperature": 0.3
    }
)

# 生成研究计划
plan = await planner.plan(
    research_question="分析人工智能对就业市场的影响",
    constraints={
        "time_limit": "30分钟",
        "depth": "comprehensive",
        "focus_areas": ["技术岗位", "制造业", "服务业"]
    }
)

print(plan.sub_queries)
# [
#   "AI对技术岗位的影响",
#   "AI对制造业的冲击",
#   "AI创造的新就业机会",
#   "未来就业趋势预测"
# ]

信息收集 Agent

from openjiuwen_deepsearch.framework.openjiuwen.agent import InformationCollectionAgent

# 信息收集 Agent
collector = InformationCollectionAgent(
    name="info_collector",
    retrievers=[fusion, web_search, local_kb]
)

# 收集信息
info = await collector.collect(
    sub_queries=plan.sub_queries,
    max_sources_per_query=10
)

print(f"收集了 {len(info.documents)} 个文档")
print(f"来源分布: {info.source_distribution}")

理解分析 Agent

from openjiuwen_deepsearch.framework.openjiuwen.agent import UnderstandingAnalysisAgent

# 理解分析 Agent
analyzer = UnderstandingAnalysisAgent(
    name="analyzer",
    llm_config={"model": "gpt-4"}
)

# 分析信息
analysis = await analyzer.analyze(
    documents=info.documents,
    research_question="分析人工智能对就业市场的影响",
    analysis_framework={
        "dimensions": ["影响程度", "时间范围", "行业分布"],
        "metrics": ["就业率变化", "薪资影响", "技能需求"]
    }
)

print(analysis.key_findings)
print(analysis.statistical_insights)

报告生成 Agent

from openjiuwen_deepsearch.framework.openjiuwen.agent import ReportGenerationAgent

# 报告生成 Agent
reporter = ReportGenerationAgent(
    name="reporter",
    template_engine=generator
)

# 生成报告
report = await reporter.generate(
    analysis=analysis,
    template="comprehensive_research",
    output_format="markdown"
)

print(report.content)

4. 交互式功能

用户反馈机制

from openjiuwen_deepsearch import InteractiveResearch

# 交互式研究
research = InteractiveResearch()

# 开始研究
session = await research.start(
    question="研究主题",
    enable_feedback=True
)

# 规划阶段反馈
plan = await research.get_plan(session.id)
user_feedback = await research.request_feedback(
    session.id,
    "请确认研究计划是否合理",
    plan.sub_queries
)

# 根据反馈调整
if user_feedback.needs_adjustment:
    await research.adjust_plan(session.id, user_feedback.suggestions)

# 继续执行
result = await research.continue(session.id)

协作修改

# 协作编辑报告
async def collaborative_edit(report, user_comments):
    for comment in user_comments:
        if comment.section == "methodology":
            report.methodology = await revise_section(
                report.methodology,
                comment.suggestion
            )
    return report

快速开始

安装

# 克隆仓库
git clone https://gitcode.com/openjiuwen/deepsearch.git
cd deepsearch

# 安装依赖
uv sync

# 配置环境
cp .env.example .env
# 编辑 .env 填入 API 密钥

命令行使用

# 基础搜索
deepsearch search "搜索查询"

# 深度研究
deepsearch research "研究问题" \
  --depth comprehensive \
  --output report.md

# 交互式研究
deepsearch interactive \
  --question "研究主题" \
  --enable-feedback

Python API

import asyncio
from openjiuwen_deepsearch import DeepSearchEngine

async def main():
    # 初始化引擎
    engine = DeepSearchEngine(
        llm_config={
            "model": "gpt-4",
            "api_key": "your-api-key"
        },
        retrievers=["keyword", "vector", "web"]
    )
    
    # 执行研究
    result = await engine.research(
        question="分析2024年AI行业发展趋势",
        depth="comprehensive",
        output_format="markdown"
    )
    
    # 保存报告
    result.save("ai_trends_2024.md")
    
    # 查看统计
    print(f"耗时: {result.duration}秒")
    print(f"检索文档: {result.documents_count}")
    print(f"引用来源: {result.citations_count}")

asyncio.run(main())

高级用法

自定义 Agent

from openjiuwen_deepsearch.framework.openjiuwen.agent import BaseAgent

class CustomAnalysisAgent(BaseAgent):
    async def execute(self, documents, research_question):
        # 自定义分析逻辑
        insights = []
        
        for doc in documents:
            # 特定领域分析
            if doc.metadata.get("domain") == "finance":
                insight = await self.analyze_financial(doc)
                insights.append(insight)
        
        return insights
    
    async def analyze_financial(self, document):
        # 金融领域特定分析
        return {
            "document": document.id,
            "financial_metrics": {...},
            "trends": [...]
        }

工作流定制

from openjiuwen_deepsearch import ResearchWorkflow

# 自定义工作流
workflow = ResearchWorkflow()

# 添加步骤
workflow.add_step("planning", planner)
workflow.add_step("collection", collector)
workflow.add_step("analysis", analyzer)
workflow.add_step("generation", reporter)

# 添加条件分支
workflow.add_condition(
    "collection",
    condition=lambda result: len(result.documents) < 10,
    true_branch="expand_search",
    false_branch="analysis"
)

# 执行工作流
result = await workflow.execute(research_question)

性能优化

from openjiuwen_deepsearch import ParallelRetrieval

# 并行检索
parallel_retrieval = ParallelRetrieval(
    retrievers=[keyword_search, vector_search, web_search],
    max_workers=5
)

# 并行执行
results = await parallel_retrieval.retrieve_all(
    queries=["查询1", "查询2", "查询3"],
    timeout=30
)

应用场景

1. 金融分析报告

# 金融研究
result = await engine.research(
    question="分析某公司2024年Q1财务表现",
    domain="finance",
    template="financial_analysis",
    data_sources=["财报", "新闻", "分析师报告"]
)

2. 学术和政策研究

# 学术研究
result = await engine.research(
    question="评估碳交易政策对制造业的影响",
    domain="academic",
    template="academic_paper",
    citation_style="apa",
    peer_reviewed_only=True
)

3. 多源验证研究

# 多源验证
result = await engine.research(
    question="验证某项声明的真实性",
    verification_mode=True,
    min_sources=5,
    cross_check=True
)

最佳实践

1. 明确研究范围

# 设置明确的研究边界
result = await engine.research(
    question="研究问题",
    scope={
        "time_range": "2023-2024",
        "geographic_focus": "中国",
        "industries": ["科技", "金融"],
        "exclude": ["社交媒体评论"]
    }
)

2. 分阶段执行

# 分阶段研究
session = await engine.start_research(question)

# 阶段1: 初步探索
exploration = await engine.explore(session.id)

# 阶段2: 深入分析
deep_analysis = await engine.deep_dive(
    session.id,
    focus_areas=exploration.promising_areas
)

# 阶段3: 报告生成
report = await engine.finalize(session.id)

3. 质量控制

# 质量检查
quality_check = await engine.quality_check(report)

if quality_check.score < 0.8:
    # 重新收集信息
    await engine.enhance_research(
        session.id,
        areas=quality_check.weak_areas
    )

相关资源

总结

deepsearch 通过多 Agent 协作和知识增强检索,提供了强大的深度研究和报告生成能力。无论是金融分析、学术研究还是政策评估,都能生成高质量、可追溯的专业报告。

收藏00

登录 后评论。没有帐号? 注册 一个。

huqi

  • 0回答
  • 0粉丝
  • 0关注