它底层基于阿里开源自研的大规模分布式搜索引擎 Havenask(HA3),下面看看如何使用:
1.安装阿里相关包
pip install alibabacloud-ha3engine
2.核心配置代码
#阿里opensearch配置包
from langchain.vectorstores import (
AlibabaCloudOpenSearch,
AlibabaCloudOpenSearchSettings,
)
#配置访问阿里云相关地址账号
settings = AlibabaCloudOpenSearchSettings(
endpoint="The endpoint of opensearch instance, You can find it from the console of Alibaba Cloud OpenSearch.",
instance_id="The identify of opensearch instance, You can find it from the console of Alibaba Cloud OpenSearch.",
datasource_name="The name of the data source specified when creating it.",
username="The username specified when purchasing the instance.",
password="The password specified when purchasing the instance.",
embedding_index_name="The name of the vector attribute specified when configuring the instance attributes.",
field_name_mapping={
"id": "id", # The id field name mapping of index document.
"document": "document", # The text field name mapping of index document.
"embedding": "embedding", # The embedding field name mapping of index document.
"metadata_x": "metadata_x,=", # The metadata field name mapping of index document, could specify multiple, The value field contains mapping name and operator, the operator would be used when executing metadata filter query.
},
)
#向量化存储
opensearch = AlibabaCloudOpenSearch.from_texts(
texts=docs, embedding=embeddings, config=settings
)
##查询
query = "What did the president say about Ketanji Brown Jackson"
docs = opensearch.similarity_search(query)
print(docs[0].page_content)