ベクトル・データベースPineconeのサービスを提供しているPinecone Systems Inc.が、RAG(Retrieval Augmented Generation)の実装を容易にするCanopyというフレームワークをオープン・ソースで提供しています。
Introducing Canopy: An easy, free, and flexible RAG framework powered by Pineconehttps://www.pinecone.io/blog/canopy-rag-framework/
sudo dnf -y install python3.11 python3.11-pip nginx
[opc@canopy-rag ~]$ sudo dnf -y install python3.11 python3.11-pip nginx
Ksplice for Oracle Linux 9 (x86_64) 385 kB/s | 158 kB 00:00
Oracle Linux 9 OCI Included Packages (x86_64) 24 MB/s | 59 MB 00:02
Oracle Linux 9 BaseOS Latest (x86_64) 14 MB/s | 17 MB 00:01
Oracle Linux 9 Application Stream Packages (x86_64) 19 MB/s | 26 MB 00:01
Oracle Linux 9 Addons (x86_64) 468 kB/s | 335 kB 00:00
Oracle Linux 9 UEK Release 7 (x86_64) 14 MB/s | 24 MB 00:01
Dependencies resolved.
========================================================================================
Package Arch Version Repository Size
========================================================================================
Installing:
nginx x86_64 1:1.20.1-14.0.1.el9_2.1 ol9_appstream 48 k
python3.11 x86_64 3.11.5-1.el9_3 ol9_appstream 30 k
python3.11-pip noarch 22.3.1-4.el9 ol9_appstream 4.3 M
Installing dependencies:
libnsl2 x86_64 2.0.0-1.el9 ol9_appstream 30 k
mpdecimal x86_64 2.5.1-3.el9 ol9_appstream 85 k
nginx-core x86_64 1:1.20.1-14.0.1.el9_2.1 ol9_appstream 587 k
nginx-filesystem noarch 1:1.20.1-14.0.1.el9_2.1 ol9_appstream 8.4 k
oracle-logos-httpd noarch 90.2-1.0.4.el9 ol9_baseos_latest 37 k
python3.11-libs x86_64 3.11.5-1.el9_3 ol9_appstream 12 M
python3.11-pip-wheel noarch 22.3.1-4.el9 ol9_appstream 1.4 M
python3.11-setuptools-wheel noarch 65.5.1-2.el9 ol9_appstream 712 k
Installing weak dependencies:
python3.11-setuptools noarch 65.5.1-2.el9 ol9_appstream 2.3 M
Transaction Summary
========================================================================================
Install 12 Packages
[中略]
Installed:
libnsl2-2.0.0-1.el9.x86_64
mpdecimal-2.5.1-3.el9.x86_64
nginx-1:1.20.1-14.0.1.el9_2.1.x86_64
nginx-core-1:1.20.1-14.0.1.el9_2.1.x86_64
nginx-filesystem-1:1.20.1-14.0.1.el9_2.1.noarch
oracle-logos-httpd-90.2-1.0.4.el9.noarch
python3.11-3.11.5-1.el9_3.x86_64
python3.11-libs-3.11.5-1.el9_3.x86_64
python3.11-pip-22.3.1-4.el9.noarch
python3.11-pip-wheel-22.3.1-4.el9.noarch
python3.11-setuptools-65.5.1-2.el9.noarch
python3.11-setuptools-wheel-65.5.1-2.el9.noarch
Complete!
[opc@canopy-rag ~]$
sudo dnf --enablerepo=ol9_developer_EPEL -y install certbot
[opc@canopy-rag ~]$ sudo dnf --enablerepo=ol9_developer_EPEL -y install certbot
Oracle Linux 9 EPEL Packages for Development (x86_64) 23 MB/s | 47 MB 00:02
Last metadata expiration check: 0:00:34 ago on Wed 29 Nov 2023 01:46:02 AM GMT.
Dependencies resolved.
========================================================================================
Package Arch Version Repository Size
========================================================================================
Installing:
certbot noarch 2.6.0-1.el9 ol9_developer_EPEL 25 k
Installing dependencies:
fontawesome-fonts noarch 1:4.7.0-13.el9 ol9_appstream 205 k
python3-acme noarch 2.6.0-1.el9 ol9_developer_EPEL 268 k
python3-certbot noarch 2.6.0-1.el9 ol9_developer_EPEL 1.0 M
python3-configargparse noarch 1.7-1.el9 ol9_developer_EPEL 56 k
python3-josepy noarch 1.13.0-1.el9 ol9_developer_EPEL 101 k
python3-parsedatetime noarch 2.6-5.el9 ol9_developer_EPEL 133 k
python3-pyrfc3339 noarch 1.1-11.el9 ol9_developer_EPEL 36 k
Transaction Summary
========================================================================================
Install 8 Packages
[中略]
Installed:
certbot-2.6.0-1.el9.noarch fontawesome-fonts-1:4.7.0-13.el9.noarch
python3-acme-2.6.0-1.el9.noarch python3-certbot-2.6.0-1.el9.noarch
python3-configargparse-1.7-1.el9.noarch python3-josepy-1.13.0-1.el9.noarch
python3-parsedatetime-2.6-5.el9.noarch python3-pyrfc3339-1.1-11.el9.noarch
Complete!
[opc@canopy-rag ~]$
sudo firewall-cmd --add-service=http
sudo firewall-cmd --add-service=https
sudo firewall-cmd --add-port=8000/tcp
sudo firewall-cmd --runtime-to-permanent
sudo firewall-cmd --reload
sudo firewall-cmd --list-all
[opc@canopy-rag ~]$ sudo firewall-cmd --add-service=http
success
[opc@canopy-rag ~]$ sudo firewall-cmd --add-service=https
success
[opc@canopy-rag ~]$ sudo firewall-cmd --add-port=8000/tcp
success
[opc@canopy-rag ~]$ sudo firewall-cmd --runtime-to-permanent
success
[opc@canopy-rag ~]$ sudo firewall-cmd --reload
success
[opc@canopy-rag ~]$ sudo firewall-cmd --list-all
public (active)
target: default
icmp-block-inversion: no
interfaces: ens3
sources:
services: dhcpv6-client http https ssh
ports: 8000/tcp
protocols:
forward: yes
masquerade: no
forward-ports:
source-ports:
icmp-blocks:
rich rules:
[opc@canopy-rag ~]$
SELinuxの設定で、Nginxによるリバース・プロキシを許可します。
sudo setsebool -P httpd_can_network_connect 1
[opc@canopy-rag ~]$ sudo setsebool -P httpd_can_network_connect 1
[opc@canopy-rag ~]$
[opc@canopy-rag ~]$ pip3.11 install canopy-sdk
Defaulting to user installation because normal site-packages is not writeable
Collecting canopy-sdk
Using cached canopy_sdk-0.2.0-py3-none-any.whl (72 kB)
Collecting fastapi<0.93.0,>=0.92.0
Using cached fastapi-0.92.0-py3-none-any.whl (56 kB)
Collecting gunicorn<22.0.0,>=21.2.0
Using cached gunicorn-21.2.0-py3-none-any.whl (80 kB)
Collecting jsonschema<5.0.0,>=4.2.0
Using cached jsonschema-4.20.0-py3-none-any.whl (84 kB)
Collecting openai<2.0.0,>=1.2.3
Using cached openai-1.3.6-py3-none-any.whl (220 kB)
Collecting pandas-stubs<3.0.0.0,>=2.0.3.230814
Using cached pandas_stubs-2.1.1.230928-py3-none-any.whl (153 kB)
Collecting pinecone-client<3.0.0,>=2.2.2
[中略]
Running setup.py install for wget ... done
Successfully installed aiobotocore-2.7.0 aiohttp-3.9.1 aioitertools-0.11.0 aiosignal-1.3.1 anyio-3.7.1 attrs-23.1.0 botocore-1.31.64 cachetools-5.3.2 canopy-sdk-0.2.0 certifi-2023.11.17 charset-normalizer-3.3.2 click-8.1.7 decorator-5.1.1 distro-1.8.0 dnspython-2.4.2 fastapi-0.92.0 frozenlist-1.4.0 fsspec-2023.10.0 gcsfs-2023.10.0 google-api-core-2.14.0 google-auth-2.23.4 google-auth-oauthlib-1.1.0 google-cloud-core-2.3.3 google-cloud-storage-2.13.0 google-crc32c-1.5.0 google-resumable-media-2.6.0 googleapis-common-protos-1.61.0 gunicorn-21.2.0 h11-0.14.0 httpcore-1.0.2 httpx-0.25.2 idna-3.6 jmespath-1.0.1 joblib-1.3.2 jsonschema-4.20.0 jsonschema-specifications-2023.11.1 loguru-0.7.2 mmh3-3.1.0 multidict-6.0.4 nltk-3.8.1 numpy-1.25.2 oauthlib-3.2.2 openai-1.3.6 packaging-23.2 pandas-2.1.3 pandas-stubs-2.0.3.230814 pinecone-client-2.2.4 pinecone-datasets-0.6.2 pinecone-text-0.7.0 prompt-toolkit-3.0.41 protobuf-4.25.1 pyarrow-11.0.0 pyasn1-0.5.1 pyasn1-modules-0.3.0 pydantic-1.10.13 python-dateutil-2.8.2 python-dotenv-1.0.0 pytz-2023.3.post1 pyyaml-6.0.1 referencing-0.31.0 regex-2023.10.3 requests-2.31.0 requests-oauthlib-1.3.1 rpds-py-0.13.1 rsa-4.9 s3fs-2023.10.0 six-1.16.0 sniffio-1.3.0 sse-starlette-1.8.2 starlette-0.25.0 tenacity-8.2.3 tiktoken-0.3.3 tqdm-4.66.1 types-jsonschema-4.20.0.0 types-pytz-2023.3.1.1 types-pyyaml-6.0.12.12 types-tqdm-4.66.0.5 typing-extensions-4.8.0 tzdata-2023.3 urllib3-2.0.7 uvicorn-0.20.0 wcwidth-0.2.12 wget-3.2 wrapt-1.16.0 yarl-1.9.3
[opc@canopy-rag ~]$
export PINECONE_API_KEY="<PINECONE_API_KEY>"
export PINECONE_ENVIRONMENT="<PINECONE_ENVIRONMENT>"
export OPENAI_API_KEY="<OPENAI_API_KEY>"
export INDEX_NAME="<INDEX_NAME>"
[opc@canopy-rag ~]$ canopy new
Canopy is going to create a new index: canopy--canopy-101
Do you want to continue? [y/N]: y
Success!
[opc@canopy-rag ~]$
sudo certbot certonly --standalone
[opc@canopy-rag ~]$ sudo certbot certonly --standalone
Saving debug log to /var/log/letsencrypt/letsencrypt.log
Enter email address (used for urgent renewal and security notices)
(Enter 'c' to cancel): <申請者のメール・アドレス>
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Please read the Terms of Service at
https://letsencrypt.org/documents/LE-SA-v1.3-September-21-2022.pdf. You must
agree in order to register with the ACME server. Do you agree?
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
(Y)es/(N)o: Y
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Would you be willing, once your first certificate is successfully issued, to
share your email address with the Electronic Frontier Foundation, a founding
partner of the Let's Encrypt project and the non-profit organization that
develops Certbot? We'd like to send you email about our work encrypting the web,
EFF news, campaigns, and ways to support digital freedom.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
(Y)es/(N)o: N
Account registered.
Please enter the domain name(s) you would like on your certificate (comma and/or
space separated) (Enter 'c' to cancel): <DNSに登録したホスト名>
Requesting a certificate for host-name
Successfully received certificate.
Certificate is saved at: /etc/letsencrypt/live/host-name/fullchain.pem
Key is saved at: /etc/letsencrypt/live/host-name/privkey.pem
This certificate expires on 2024-02-27.
These files will be updated when the certificate renews.
Certbot has set up a scheduled task to automatically renew this certificate in the background.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
If you like Certbot, please consider supporting our work by:
* Donating to ISRG / Let's Encrypt: https://letsencrypt.org/donate
* Donating to EFF: https://eff.org/donate-le
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
[opc@canopy-rag ~]$
server { | |
listen 443 ssl; | |
ssl_certificate /etc/letsencrypt/live/<ホスト名>/fullchain.pem; | |
ssl_certificate_key /etc/letsencrypt/live/<ホスト名>/privkey.pem; | |
server_name <ホスト名>; | |
root /usr/share/nginx/html; | |
index index.html; | |
location / { | |
proxy_pass http://127.0.0.1:8000/; | |
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | |
proxy_set_header X-Forwarded-Proto $scheme; | |
proxy_set_header Host $http_host; | |
proxy_redirect off; | |
proxy_send_timeout 10; | |
proxy_read_timeout 60; | |
} | |
} |
[opc@canopy-rag ~]$ sudo systemctl start nginx
[opc@canopy-rag ~]$
[opc@canopy-rag ~]$ . mac.env
[opc@canopy-rag ~]$ canopy start
🚨 Note 🚨
For debugging only. To run the Canopy server in production run the command:
gunicorn canopy_server.app:app --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000 --workers <num_workers>
Starting Canopy server on 0.0.0.0:8000
INFO: Started server process [3900]
INFO: Waiting for application startup.
2023-11-29 03:25:43,526 - MainProcess - canopy_server.app [INFO ]: Did not find config file. Initializing engines with default configuration
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
手元のブラウザより、https://ホスト名/docsにアクセスします。HTTPSのリクエストはNginxが受け付けますがCanopyに転送されているため、CanopyのAPI定義情報が表示されます。
create table canopy_documents (
id number generated by default on null as identity
constraint canopy_documents_id_pk primary key,
source varchar2(80 char) not null,
text clob not null
);
続いて以下のコードを実行し、パッケージUTL_CANOPY_APIを作成します。CanopyへのRAGを使った問い合わせ(Completions APIの呼び出し)、Pineconeのインデックスの検索(Query API)、RAGの知識となるドキュメントの登録と削除(Upsert/Delete API)を呼び出すプロシージャを作成しています。
create or replace package utl_canopy_api | |
as | |
/** | |
* CanopyのCompletions APIを呼び出す。 | |
*/ | |
procedure completions( | |
p_user_content in clob | |
,p_stream in boolean default false | |
,p_canopy_url in varchar2 | |
,p_assistant_content out clob | |
,p_response out clob | |
); | |
/** | |
* CanopyのQuery APIを呼び出す。 | |
*/ | |
procedure query( | |
p_query_text in clob | |
,p_top_k in number default 3 | |
,p_namespace in varchar2 default '' | |
,p_max_tokens in number default 8192 | |
,p_canopy_url in varchar2 | |
,p_query_result out clob | |
,p_response out clob | |
); | |
/** | |
* CanopyのUpsertまたはDelete APIを呼び出す。 | |
* APEXのフォームから呼び出すことを想定しているため、Document IDなどは | |
* 多くても1つだけを処理する。 | |
*/ | |
procedure manage_document( | |
p_row_status in varchar2 | |
,p_id in number | |
,p_source in varchar2 | |
,p_text in clob | |
,p_batch_size in number default 200 | |
,p_canopy_url in varchar2 | |
,p_response out clob | |
); | |
end utl_canopy_api; | |
/ | |
create or replace package body utl_canopy_api | |
as | |
procedure completions( | |
p_user_content in clob | |
,p_stream in boolean | |
,p_canopy_url in varchar2 | |
,p_assistant_content out clob | |
,p_response out clob | |
) | |
as | |
l_request json_object_t; | |
l_messages json_array_t; | |
l_message json_object_t; | |
l_request_clob clob; | |
e_call_api_failed exception; | |
l_response_json json_object_t; | |
l_choices json_array_t; | |
begin | |
l_message := json_object_t(); | |
l_message.put('role','user'); | |
l_message.put('content', p_user_content); | |
l_messages := json_array_t(); | |
l_messages.append(l_message); | |
l_request := json_object_t(); | |
l_request.put('messages', l_messages); | |
l_request.put('model',''); | |
l_request.put('stream',false); | |
l_request_clob := l_request.to_clob(); | |
/* | |
* Canopyのcompletionsを呼び出す。 | |
*/ | |
apex_web_service.clear_request_headers; | |
apex_web_service.set_request_headers('Content-Type','application/json', p_reset => false); | |
apex_web_service.set_request_headers('Accept','application/json', p_reset => false); | |
p_response := apex_web_service.make_rest_request( | |
p_url => p_canopy_url || '/chat/completions' | |
,p_http_method => 'POST' | |
,p_body => l_request_clob | |
); | |
if apex_web_service.g_status_code <> 200 then | |
raise e_call_api_failed; | |
end if; | |
/* */ | |
l_response_json := json_object_t(p_response); | |
l_choices := l_response_json.get_array('choices'); | |
p_assistant_content := ''; | |
for i in 1..l_choices.get_size() | |
loop | |
l_message := treat(l_choices.get(i-1) as json_object_t).get_object('message'); | |
if l_message.get_string('role') = 'assistant' then | |
p_assistant_content := p_assistant_content || l_message.get_string('content'); | |
end if; | |
end loop; | |
end completions; | |
procedure query( | |
p_query_text in clob | |
,p_top_k in number | |
,p_namespace in varchar2 | |
,p_max_tokens in number | |
,p_canopy_url in varchar2 | |
,p_query_result out clob | |
,p_response out clob | |
) | |
as | |
l_request json_object_t; | |
l_queries json_array_t; | |
l_query json_object_t; | |
l_request_clob clob; | |
e_call_api_failed exception; | |
l_response_json json_object_t; | |
l_content clob; | |
l_snippets json_array_t; | |
l_snippet json_object_t; | |
begin | |
l_query := json_object_t(); | |
l_query.put('text', p_query_text); | |
l_query.put('namespace', p_namespace); | |
-- l_query.put('matadata_filter', '{}'); | |
l_query.put('top_k', p_top_k); | |
-- l_query.put('query_params', '{}'); | |
l_queries := json_array_t(); | |
l_queries.append(l_query); | |
l_request := json_object_t(); | |
l_request.put('queries', l_queries); | |
l_request.put('max_tokens', p_max_tokens); | |
l_request_clob := l_request.to_clob(); | |
/* | |
* Canopyのqueryを呼び出す。 | |
*/ | |
apex_web_service.clear_request_headers; | |
apex_web_service.set_request_headers('Content-Type','application/json', p_reset => false); | |
apex_web_service.set_request_headers('Accept','application/json', p_reset => false); | |
p_response := apex_web_service.make_rest_request( | |
p_url => p_canopy_url || '/context/query' | |
,p_http_method => 'POST' | |
,p_body => l_request_clob | |
); | |
if apex_web_service.g_status_code <> 200 then | |
raise e_call_api_failed; | |
end if; | |
/* */ | |
l_response_json := json_object_t(p_response); | |
l_content := l_response_json.get_string('content'); | |
l_queries := json_array_t(l_content); | |
p_query_result := ''; | |
for i in 1..l_queries.get_size() | |
loop | |
l_query := treat(l_queries.get(i-1) as json_object_t); | |
l_snippets := l_query.get_array('snippets'); | |
for j in 1..l_snippets.get_size() | |
loop | |
l_snippet := treat(l_snippets.get(j-1) as json_object_t); | |
p_query_result := p_query_result || 'Source: ' || l_snippet.get_string('source'); | |
p_query_result := p_query_result || apex_application.LF || apex_application.LF; | |
p_query_result := p_query_result || l_snippet.get_string('text'); | |
p_query_result := p_query_result || apex_application.LF || apex_application.LF; | |
end loop; | |
end loop; | |
end query; | |
procedure manage_document( | |
p_row_status in varchar2 | |
,p_id in number | |
,p_source in varchar2 | |
,p_text in clob | |
,p_batch_size in number | |
,p_canopy_url in varchar2 | |
,p_response out clob | |
) | |
as | |
l_request json_object_t; | |
l_request_clob clob; | |
l_documents json_array_t; | |
l_document json_object_t; | |
l_metadata json_object_t := json_object_t(); | |
l_response clob; | |
e_call_api_failed exception; | |
begin | |
case | |
when p_row_status in ('C','U') then | |
l_request := json_object_t(); | |
l_documents := json_array_t(); | |
l_document := json_object_t(); | |
l_document.put('id', p_id); | |
l_document.put('text', p_text); | |
l_document.put('source', p_source); | |
l_document.put('metadata', l_metadata); | |
l_documents.append(l_document); | |
l_request.put('documents', l_documents); | |
l_request.put('batch_size', p_batch_size); | |
-- | |
l_request_clob := l_request.to_clob(); | |
l_response := apex_web_service.make_rest_request( | |
p_url => p_canopy_url || '/context/upsert' | |
,p_http_method => 'POST' | |
,p_body => l_request_clob | |
); | |
if apex_web_service.g_status_code <> 200 then | |
raise e_call_api_failed; | |
end if; | |
-- response should be "Success" | |
when p_row_status = 'D' then | |
l_request := json_object_t(); | |
l_documents := json_array_t(); | |
l_documents.append(p_id); | |
l_request.put('document_ids', l_documents); | |
-- | |
l_request_clob := l_request.to_clob(); | |
l_response := apex_web_service.make_rest_request( | |
p_url => p_canopy_url || '/context/delete' | |
,p_http_method => 'POST' | |
,p_body => l_request_clob | |
); | |
if apex_web_service.g_status_code <> 200 then | |
raise e_call_api_failed; | |
end if; | |
end case; | |
end manage_document; | |
end utl_canopy_api; | |
/ |
declare
l_response clob;
begin
utl_canopy_api.manage_document(
p_row_status => :APEX$ROW_STATUS
,p_id => :P2_ID
,p_source => :P2_SOURCE
,p_text => :P2_TEXT
,p_canopy_url => :G_CANOPY_URL
,p_response => l_response
);
end;