- BAAI/bge-m3がlexical_weightsとして返す与えられたテキストに存在するトークンの重みをOracle Databaseにsparseベクトルとして保存できる形式[次元数, [索引の配列 - 昇順], [索引に対応した値]]に変換し、sparse_vecsとして返しています。
- BAAI/bge-m3が認識するトークン数は250002ですが、オラクル・データベースのベクトル型が扱える次元数の最大値は65535なので、扱うトークン数を65535に減らしています。
- denseベクトルについては、そのままdense_vecsとして返しています。
- リクエスト毎にトークンに対応するインデックスが変わらないようにglobal_vocab_mapping.jsonにトークンのインデックスを保存しています。
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel, validator | |
from typing import List, Dict, Optional | |
from FlagEmbedding import BGEM3FlagModel | |
import logging | |
import json | |
import os | |
# Logging configuration | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Configuration | |
MAX_TEXTS = 100 | |
PRECISION = 4 | |
MAX_VOCAB_SIZE = 65535 # Oracle 23ai limit | |
VOCAB_DICT_PATH = "global_vocab_mapping.json" # Persistent vocabulary file | |
# Model loading | |
try: | |
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True) | |
original_vocab_size = model.tokenizer.vocab_size | |
logger.info(f"Original vocab size: {original_vocab_size}") | |
except Exception as e: | |
logger.error(f"Failed to load model: {e}") | |
raise | |
# Global vocabulary dictionary (persistent across requests) | |
global_vocab_mapping: Dict[str, int] = {} | |
current_vocab_size = 0 | |
def load_global_vocabulary(): | |
"""Load global vocabulary mapping from file""" | |
global global_vocab_mapping, current_vocab_size | |
if os.path.exists(VOCAB_DICT_PATH): | |
try: | |
with open(VOCAB_DICT_PATH, 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
global_vocab_mapping = {str(k): int(v) for k, v in data['mapping'].items()} | |
current_vocab_size = data['size'] | |
logger.info(f"Loaded global vocabulary: {current_vocab_size} tokens") | |
except Exception as e: | |
logger.error(f"Failed to load vocabulary: {e}") | |
global_vocab_mapping = {} | |
current_vocab_size = 0 | |
else: | |
logger.info("No existing vocabulary file found, starting fresh") | |
def save_global_vocabulary(): | |
"""Save global vocabulary mapping to file""" | |
try: | |
data = { | |
'mapping': global_vocab_mapping, | |
'size': current_vocab_size | |
} | |
with open(VOCAB_DICT_PATH, 'w', encoding='utf-8') as f: | |
json.dump(data, f, ensure_ascii=False, indent=2) | |
logger.info(f"Saved global vocabulary: {current_vocab_size} tokens") | |
except Exception as e: | |
logger.error(f"Failed to save vocabulary: {e}") | |
def update_global_vocabulary(new_tokens: List[str]): | |
"""Update global vocabulary with new tokens""" | |
global global_vocab_mapping, current_vocab_size | |
added_count = 0 | |
for token in new_tokens: | |
token_str = str(token) | |
if token_str not in global_vocab_mapping and current_vocab_size < MAX_VOCAB_SIZE: | |
global_vocab_mapping[token_str] = current_vocab_size | |
current_vocab_size += 1 | |
added_count += 1 | |
if added_count > 0: | |
save_global_vocabulary() | |
logger.info(f"Added {added_count} new tokens to global vocabulary") | |
return added_count | |
def get_top_tokens_from_batch(lexical_weights: List[Dict], top_k: int = 1000): | |
"""Get top K tokens from current batch""" | |
token_scores = {} | |
# Collect all tokens with their max scores | |
for weights_dict in lexical_weights: | |
for token_id, weight in weights_dict.items(): | |
token_str = str(token_id) | |
if token_str not in token_scores or weight > token_scores[token_str]: | |
token_scores[token_str] = weight | |
# Sort by score and return top K | |
sorted_tokens = sorted(token_scores.items(), key=lambda x: x[1], reverse=True) | |
top_tokens = [token for token, _ in sorted_tokens[:top_k]] | |
return top_tokens | |
def dense_to_text(dense_vecs, precision=PRECISION): | |
"""Convert dense_vecs to string representation""" | |
text_representations = [] | |
for vec in dense_vecs: | |
vec_str = '[' + ','.join([f"{val:.{precision}f}" for val in vec]) + ']' | |
text_representations.append(vec_str) | |
return text_representations | |
def lexical_to_sparse_format_fixed(lexical_weights: List[Dict]): | |
"""Convert lexical_weights to sparse format using fixed global vocabulary""" | |
sparse_representations = [] | |
for weights_dict in lexical_weights: | |
if not weights_dict: | |
# Empty sparse vector | |
sparse_representations.append(f"[{MAX_VOCAB_SIZE}, [], []]") | |
continue | |
# Map tokens to global vocabulary positions | |
mapped_tokens = [] | |
mapped_weights = [] | |
for token_id, weight in weights_dict.items(): | |
token_str = str(token_id) | |
if token_str in global_vocab_mapping: | |
global_pos = global_vocab_mapping[token_str] | |
mapped_tokens.append(global_pos) | |
mapped_weights.append(weight) | |
if not mapped_tokens: | |
sparse_representations.append(f"[{MAX_VOCAB_SIZE}, [], []]") | |
continue | |
try: | |
# Sort by index position (ascending order) instead of weight | |
paired_data = list(zip(mapped_tokens, mapped_weights)) | |
paired_data.sort(key=lambda x: x[0]) # Sort by index (x[0]) instead of weight (x[1]) | |
sorted_positions, sorted_weights = zip(*paired_data) if paired_data else ([], []) | |
# Format indices array | |
indices_str = '[' + ','.join(map(str, sorted_positions)) + ']' | |
# Format values array with specified precision | |
values_str = '[' + ','.join([f"{weight:.{PRECISION}f}" for weight in sorted_weights]) + ']' | |
# Create sparse vector format: [vocab_size, [indices], [values]] | |
sparse_format = f"[{MAX_VOCAB_SIZE}, {indices_str}, {values_str}]" | |
sparse_representations.append(sparse_format) | |
except Exception as e: | |
logger.warning(f"Failed to convert lexical weights to sparse format: {e}") | |
sparse_representations.append(f"[{MAX_VOCAB_SIZE}, [], []]") | |
return sparse_representations | |
# Load global vocabulary on startup | |
load_global_vocabulary() | |
# FastAPI app definition | |
app = FastAPI( | |
title="BGE-M3 Embedding API with Fixed Global Vocabulary", | |
description="API for BGE-M3 text embeddings with persistent global vocabulary for Oracle 23ai", | |
version="2.0.0" | |
) | |
# Input data structure | |
class TextInput(BaseModel): | |
texts: List[str] | |
update_vocabulary: bool = True # Whether to update global vocab with new tokens | |
top_k_per_batch: int = 1000 # Top K tokens to consider from current batch | |
@validator('texts') | |
def validate_texts(cls, v): | |
if not v: | |
raise ValueError("texts cannot be empty") | |
if len(v) > MAX_TEXTS: | |
raise ValueError(f"Too many texts. Maximum: {MAX_TEXTS}") | |
return v | |
def embed(texts: List[str], update_vocabulary: bool = True, top_k_per_batch: int = 1000): | |
"""Embedding extraction function with fixed global vocabulary""" | |
try: | |
output = model.encode( | |
texts, | |
return_dense=True, | |
return_sparse=True, | |
return_colbert_vecs=False | |
) | |
dense = output["dense_vecs"] | |
dense_text = dense_to_text(dense) | |
lexical = output["lexical_weights"] | |
# Update global vocabulary if requested | |
if update_vocabulary: | |
# Get top tokens from current batch | |
top_tokens = get_top_tokens_from_batch(lexical, top_k_per_batch) | |
added_count = update_global_vocabulary(top_tokens) | |
logger.info(f"Vocabulary update: {added_count} new tokens added") | |
# Convert to sparse format using fixed global vocabulary | |
lexical_sparse = lexical_to_sparse_format_fixed(lexical) | |
# Create individual results for each input text | |
results = [] | |
for i in range(len(texts)): | |
results.append({ | |
"dense_vecs": dense_text[i], | |
"sparse_vecs": lexical_sparse[i], | |
"vocab_size_used": MAX_VOCAB_SIZE, | |
"global_vocab_size": current_vocab_size | |
}) | |
logger.info(f"Processed {len(texts)} texts using global vocabulary ({current_vocab_size} tokens)") | |
return results | |
except Exception as e: | |
logger.error(f"Embedding failed: {e}") | |
raise | |
# Endpoint definitions | |
@app.get("/health") | |
def health_check(): | |
"""Health check endpoint""" | |
return { | |
"status": "healthy", | |
"model": "BAAI/bge-m3", | |
"original_vocab_size": original_vocab_size, | |
"global_vocab_size": current_vocab_size, | |
"max_vocab_size": MAX_VOCAB_SIZE | |
} | |
@app.get("/vocabulary/info") | |
def vocabulary_info(): | |
"""Get global vocabulary information""" | |
return { | |
"total_tokens": current_vocab_size, | |
"max_capacity": MAX_VOCAB_SIZE, | |
"usage_percentage": round(current_vocab_size / MAX_VOCAB_SIZE * 100, 2), | |
"vocab_file": VOCAB_DICT_PATH | |
} | |
@app.get("/vocabulary/sample") | |
def vocabulary_sample(limit: int = 20): | |
"""Get sample tokens from global vocabulary""" | |
sample_tokens = list(global_vocab_mapping.items())[:limit] | |
return { | |
"sample_tokens": sample_tokens, | |
"total_count": len(global_vocab_mapping) | |
} | |
@app.post("/vocabulary/reset") | |
def reset_vocabulary(): | |
"""Reset global vocabulary (admin function)""" | |
global global_vocab_mapping, current_vocab_size | |
global_vocab_mapping = {} | |
current_vocab_size = 0 | |
if os.path.exists(VOCAB_DICT_PATH): | |
os.remove(VOCAB_DICT_PATH) | |
return { | |
"status": "success", | |
"message": "Global vocabulary has been reset" | |
} | |
@app.post("/embed") | |
def get_embeddings(input: TextInput): | |
"""Embedding extraction endpoint with fixed global vocabulary""" | |
try: | |
logger.info(f"Processing {len(input.texts)} texts") | |
results = embed(input.texts, input.update_vocabulary, input.top_k_per_batch) | |
return results | |
except ValueError as e: | |
raise HTTPException(status_code=400, detail=str(e)) | |
except Exception as e: | |
logger.error(f"Unexpected error: {e}") | |
raise HTTPException(status_code=500, detail="Internal server error") | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7999) |
create table ebaj_sample_texts ( | |
id number generated by default on null as identity | |
constraint ebaj_sample_texts_id_pk primary key, | |
text varchar2(4000 char), | |
is_updated boolean default true, | |
dvec vector, | |
svec vector(65535, float32, sparse) | |
); |
% git clone https://github.com/ujnak/bge-m3-service
Cloning into 'bge-m3-service'...
remote: Enumerating objects: 22, done.
remote: Counting objects: 100% (22/22), done.
remote: Compressing objects: 100% (17/17), done.
remote: Total 22 (delta 6), reused 6 (delta 2), pack-reused 0 (from 0)
Receiving objects: 100% (22/22), 9.72 KiB | 9.72 MiB/s, done.
Resolving deltas: 100% (6/6), done.
% cd bge-m3-service
bge-m3-service %
bge-m3-service % python3.12 -m venv bge-m3
bge-m3-service % . bge-m3/bin/activate
(bge-m3) bge-m3-service %
(bge-m3) bge-m3-service % pip install torch transformers fastapi uvicorn FlagEmbedding
Collecting torch
Using cached torch-2.7.0-cp312-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting transformers
Using cached transformers-4.52.3-py3-none-any.whl.metadata (40 kB)
Collecting fastapi
Using cached fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
Using cached uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting FlagEmbedding
[中略]
Using cached six-1.17.0-py2.py3-none-any.whl (11 kB)
Using cached yarl-1.20.0-cp312-cp312-macosx_11_0_arm64.whl (95 kB)
Installing collected packages: warc3-wet-clueweb09, warc3-wet, sentencepiece, pytz, mpmath, cbor, zlib-state, xxhash, urllib3, unlzw3, tzdata, typing-extensions, tqdm, threadpoolctl, sympy, soupsieve, sniffio, six, setuptools, safetensors, regex, pyyaml, pyarrow, psutil, protobuf, propcache, Pillow, packaging, numpy, networkx, multidict, MarkupSafe, lz4, lxml, joblib, ijson, idna, hf-xet, h11, fsspec, frozenlist, filelock, dill, click, charset-normalizer, certifi, attrs, annotated-types, aiohappyeyeballs, yarl, uvicorn, typing-inspection, trec-car-tools, scipy, requests, python-dateutil, pydantic-core, multiprocess, jinja2, beautifulsoup4, anyio, aiosignal, torch, starlette, scikit-learn, pydantic, pandas, inscriptis, huggingface-hub, aiohttp, tokenizers, ir-datasets, fastapi, accelerate, transformers, datasets, sentence_transformers, peft, FlagEmbedding
Successfully installed FlagEmbedding-1.3.5 MarkupSafe-3.0.2 Pillow-11.2.1 accelerate-1.7.0 aiohappyeyeballs-2.6.1 aiohttp-3.12.4 aiosignal-1.3.2 annotated-types-0.7.0 anyio-4.9.0 attrs-25.3.0 beautifulsoup4-4.13.4 cbor-1.0.0 certifi-2025.4.26 charset-normalizer-3.4.2 click-8.2.1 datasets-3.6.0 dill-0.3.8 fastapi-0.115.12 filelock-3.18.0 frozenlist-1.6.0 fsspec-2025.3.0 h11-0.16.0 hf-xet-1.1.2 huggingface-hub-0.32.2 idna-3.10 ijson-3.4.0 inscriptis-2.6.0 ir-datasets-0.5.10 jinja2-3.1.6 joblib-1.5.1 lxml-5.4.0 lz4-4.4.4 mpmath-1.3.0 multidict-6.4.4 multiprocess-0.70.16 networkx-3.5 numpy-2.2.6 packaging-25.0 pandas-2.2.3 peft-0.15.2 propcache-0.3.1 protobuf-6.31.1 psutil-7.0.0 pyarrow-20.0.0 pydantic-2.11.5 pydantic-core-2.33.2 python-dateutil-2.9.0.post0 pytz-2025.2 pyyaml-6.0.2 regex-2024.11.6 requests-2.32.3 safetensors-0.5.3 scikit-learn-1.6.1 scipy-1.15.3 sentence_transformers-4.1.0 sentencepiece-0.2.0 setuptools-80.9.0 six-1.17.0 sniffio-1.3.1 soupsieve-2.7 starlette-0.46.2 sympy-1.14.0 threadpoolctl-3.6.0 tokenizers-0.21.1 torch-2.7.0 tqdm-4.67.1 transformers-4.52.3 trec-car-tools-2.6 typing-extensions-4.13.2 typing-inspection-0.4.1 tzdata-2025.2 unlzw3-0.2.3 urllib3-2.4.0 uvicorn-0.34.2 warc3-wet-0.2.5 warc3-wet-clueweb09-0.2.5 xxhash-3.5.0 yarl-1.20.0 zlib-state-0.1.9
[notice] A new release of pip is available: 25.0 -> 25.1.1
[notice] To update, run: pip install --upgrade pip
(bge-m3) bge-m3-service %
python -m uvicorn embed_service:app --host 0.0.0.0 --port 7999
(bge-m3) bge-m3-service % python -m uvicorn embed_service:app --host 0.0.0.0 --port 7999
Fetching 30 files: 100%|█████████████████████████████████████████████| 30/30 [00:00<00:00, 37449.14it/s]
INFO:FlagEmbedding.finetune.embedder.encoder_only.m3.runner:loading existing colbert_linear and sparse_linear---------
INFO:embed_service:Original vocab size: 250002
INFO:embed_service:No existing vocabulary file found, starting fresh
INFO: Started server process [50730]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:7999 (Press CTRL+C to quit)
別ターミナルからAPIサーバーのテストを行います。bge-m3-service以下にある、curl-embed.shを実行します。
bge-m3-service % sh curl-embed.sh
[{"dense_vecs":"[-0.0255,0.0173,-0.0375,-0.0133,0.0063,-0.0039,0.0450,0.0021,0.0331,-0.0027,-0.0379,0.0154,0.0147,-0.0050,0.0204,-0.0366,0.0550,-0.0374,0.0105,-0.0331,-0.0450,-0.0063,0.0285,0.0007,-0.0388,0.0300,-0.0045,0.0000,0.0279,0.0167,0.0259,-0.0350,0.0265,-0.0613,-0.0640,-0.0265,-0.0049,-0.0115,-0.0163,0.0206,0.0238,-0.0144,0.0145,-0.0296,-0.0195,0.0018,-0.0084,-0.0255,-0.0627,0.0074,0.0166,0.0108,0.0492,0.0045,0.0042,-0.0186,0.0340,-0.0161,-0.0818,0.0044,-0.0417,0.0245,-0.0016,0.0204,0.0018,0.1017,0.0242,0.0227,-0.0379,-0.0374,-0.0229,0.0200,-0.0221,0.0254,-0.0766,0.0153,0.0059,-0.0231,0.0127,-0.0023,0.0569,0.0035,-0.0140,-0.0096,-0.0198,0.0243,-0.0430,-0.0243,-0.0211,-0.0225,-0.0524,-0.0264,0.0189,-0.0241,-0.0456,0.0179,-0.0551,0.0323,-0.0023,-0.0112,0.0055,0.0312,0.0160,-0.0233,-0.0106,-0.0033,0.0331,0.0562,0.0396,-0.0053,0.0333,0.0254,0.0373,0.0060,0.0143,0.0007,-0.0164,0.0039,-0.0027,0.0135,0.0421,0.0587,0.0315,-0.0347,-0.0123,-0.0181,-0.0037,0.0150,0.0315,-0.0129,0.0170,0.0433,-0.0415,-0.0066,-0.0396,-0.0179,-0.0151,0.0403,-0.0104,-0.0419,0.0449,0.0170,-0.0127,0.0080,0.0468,-0.0452,0.0390,-0.0341,-0.0004,0.0017,-0.0266,0.0173,-0.0157,-0.0041,0.0045
[中略]
.0513,0.0150,-0.0204,0.0061,-0.0134,0.0130,-0.0779,0.0168,-0.0278,0.0134,-0.0294,0.0017,0.0172,0.0181,0.0314,-0.0123,0.0229,-0.0029,0.0466,0.0105,-0.0025,0.0192,0.0345,-0.0399,0.0104,0.0249,0.0032,0.0020,-0.0242,0.0219,-0.0086,-0.0558,-0.0021,-0.0184,-0.0048,-0.0218,-0.0332,-0.0001,0.0017,-0.0067,-0.0370,-0.0142,0.0523,-0.0179,0.0022,0.0572,-0.0072,0.0157,0.0148,-0.0072,0.0093,0.0372,-0.0104,0.0038,-0.0451,-0.0534,-0.0331,0.0259,0.0135,-0.0163,-0.0484,0.0035,0.0306,0.0150,-0.0190,-0.0087,0.0188,0.0075,0.0080,0.0271,0.0631,0.0171,0.0356,0.0160,0.0133,0.0228,0.0114,0.0104,-0.0215,0.0253,0.0087,0.0027,0.0205,0.0030,-0.0147,0.0014,0.0494,0.0320,0.0094,0.0451,-0.0006,0.0057,-0.0065,-0.0080,-0.0045,-0.0446,-0.0305,0.0051,0.0176,0.0033,-0.0374,0.0004,-0.0307,-0.0268,-0.0089,0.0405,-0.0035,0.0190,-0.0016,0.0068,-0.0053,0.0191,-0.0213,0.0217,0.0030,-0.0163,0.0152,-0.0079,-0.0696,0.0155,0.0108,0.0184,-0.0247,0.0072,-0.0284,0.0532,0.0005,-0.0145,0.0621,0.0300,-0.0178,-0.0079,0.0271,-0.0044,-0.0003,0.0186]","sparse_vecs":"[65535, [1,3,5,6,7], [0.3589,0.2678,0.2279,0.2076,0.2061]]","vocab_size_used":65535,"global_vocab_size":9}]
bge-m3-service %
APIサーバーのログとして以下が追加で表示されます。
INFO:embed_service:Processing 2 texts
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
INFO:embed_service:Saved global vocabulary: 9 tokens
INFO:embed_service:Added 9 new tokens to global vocabulary
INFO:embed_service:Vocabulary update: 9 new tokens added
INFO:embed_service:Processed 2 texts using global vocabulary (9 tokens)
INFO: 127.0.0.1:54784 - "POST /embed HTTP/1.1" 200 OK
CTRL+Cを入力しAPIサーバーを停止します。
(bge-m3) bge-m3-service % podman build --file Dockerfile --tag bge-m3-service .
STEP 1/5: FROM python:3.12
STEP 2/5: WORKDIR /app
--> 2bc20267de12
STEP 3/5: COPY embed_service.py .
--> 22335c7b57ea
STEP 4/5: RUN pip install torch transformers fastapi uvicorn FlagEmbedding
Collecting torch
Downloading torch-2.7.0-cp312-cp312-manylinux_2_28_aarch64.whl.metadata (29 kB)
Collecting transformers
Downloading transformers-4.52.3-py3-none-any.whl.metadata (40 kB)
Collecting fastapi
Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)
Collecting FlagEmbedding
[中略]
Successfully installed FlagEmbedding-1.3.5 MarkupSafe-3.0.2 Pillow-11.2.1 accelerate-1.7.0 aiohappyeyeballs-2.6.1 aiohttp-3.12.4 aiosignal-1.3.2 annotated-types-0.7.0 anyio-4.9.0 attrs-25.3.0 beautifulsoup4-4.13.4 cbor-1.0.0 certifi-2025.4.26 charset-normalizer-3.4.2 click-8.2.1 datasets-3.6.0 dill-0.3.8 fastapi-0.115.12 filelock-3.18.0 frozenlist-1.6.0 fsspec-2025.3.0 h11-0.16.0 hf-xet-1.1.2 huggingface-hub-0.32.3 idna-3.10 ijson-3.4.0 inscriptis-2.6.0 ir-datasets-0.5.10 jinja2-3.1.6 joblib-1.5.1 lxml-5.4.0 lz4-4.4.4 mpmath-1.3.0 multidict-6.4.4 multiprocess-0.70.16 networkx-3.5 numpy-2.2.6 packaging-25.0 pandas-2.2.3 peft-0.15.2 propcache-0.3.1 protobuf-6.31.1 psutil-7.0.0 pyarrow-20.0.0 pydantic-2.11.5 pydantic-core-2.33.2 python-dateutil-2.9.0.post0 pytz-2025.2 pyyaml-6.0.2 regex-2024.11.6 requests-2.32.3 safetensors-0.5.3 scikit-learn-1.6.1 scipy-1.15.3 sentence_transformers-4.1.0 sentencepiece-0.2.0 setuptools-80.9.0 six-1.17.0 sniffio-1.3.1 soupsieve-2.7 starlette-0.46.2 sympy-1.14.0 threadpoolctl-3.6.0 tokenizers-0.21.1 torch-2.7.0 tqdm-4.67.1 transformers-4.52.3 trec-car-tools-2.6 typing-extensions-4.13.2 typing-inspection-0.4.1 tzdata-2025.2 unlzw3-0.2.3 urllib3-2.4.0 uvicorn-0.34.2 warc3-wet-0.2.5 warc3-wet-clueweb09-0.2.5 xxhash-3.5.0 yarl-1.20.0 zlib-state-0.1.9
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.
[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: pip install --upgrade pip
--> e2603b9cfae7
STEP 5/5: CMD ["uvicorn", "embed_service:app", "--host", "0.0.0.0", "--port", "7999", "--log-level", "info", "--access-log"]
COMMIT bge-m3-service
--> 1036edc0e509
Successfully tagged localhost/bge-m3-service:latest
1036edc0e509ec62573210d9104dda4d2fb2447a33550256c6dd60432479428d
(bge-m3) bge-m3-service %
イメージbge-m3-serviceが作成されたことを確認します。
(bge-m3) bge-m3-service % podman image ls bge-m3-service
REPOSITORY TAG IMAGE ID CREATED SIZE
localhost/bge-m3-service latest 1036edc0e509 31 seconds ago 2.62 GB
(bge-m3) bge-m3-service %
作成したコンテナ・イメージからコンテナbge-m3を作成し、実行します。
% podman run -d --name bge-m3 -p 7999:7999 localhost/bge-m3-service
9546a4fbddfd6d4b0e7575fa917a09dabd4e12b1441fb6de581eff60f3a18695
(bge-m3) bge-m3-service %
コンテナのログを確認します。こちらも初回起動時は、リクエストを受け付けられるようになるまでに時間がかかります。
(bge-m3) bge-m3-service % podman logs -f bge-m3
Fetching 30 files: 100%|██████████| 30/30 [01:20<00:00, 2.68s/it]
INFO:FlagEmbedding.finetune.embedder.encoder_only.m3.runner:loading existing colbert_linear and sparse_linear---------
INFO:embed_service:Original vocab size: 250002
INFO:embed_service:No existing vocabulary file found, starting fresh
INFO: Started server process [1]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:7999 (Press CTRL+C to quit)
bge-m3-service % sh curl-embed.sh
[{"dense_vecs":"[-0.0255,0.0174,-0.0374,-0.0134,0.0063,-0.0038,0.0450,0.0021,0.0331,-0.0028,-0.0379,0.0155,0.0147,-0.0050,0.0204,-0.0366,0.0550,-0.0375,0.0105,-0.0331,-0.0450,-0.0063,0.0284,0.0007,-0.0389,0.0300,-0.0046,0.0000,0.0279,0.0167,0.0259,-0.0351,0.0265,-0.0613,-0.0640,-0.0265,-0.0050,-0.0116,-0.0163,0.0206,0.0238,-0.0144,0.0145,-0.0297,-0.0194,0.0018,-0.0084,-0.0255,-0.0627,0.0074,0.0166,0.0108,0.0492,0.0046,0.0042,-0.0185,0.0340,-0.0160,-0.0819,0.0044,-0.0416,0.0245,-0.0016,0.0205,0.0018,0.1016,0.0242,0.0227,-0.0379,-0.0373,-0.0229,0.0200,-0.0221,0.0254,-0.0765,0.0153,0.0059,-0.0231,0.0127,-0.0023,0.0569,0.0034,-0.0140,-0.0096,-0.0198,0.0242,-0.0430,-0.0243,-0.0211,-0.0225,-0.0523,-0.0264,0.0188,-0.0242,-0.0456,0.0179,-0.0550,0.0323,-0.0022,-0.0113,0.0055,0.0312,0.0160,-0.0234,-0.0106,-0.0033,0.0331,0.0562,0.0397,-0.0053,0.0333,0.0254,0.0373,0.0060,0.0144,0.0007,-0.0164,0.0039,-0.0027,0.0136,0.0422,0.0587,0.0315,-0.0346,-0.0123,-0.0180,-0.0037,0.0150,0.0315,-0.0129,0.0170,0.0433,-0.0415,-0.0066,-0.0395,-0.0178,-0.0152,0.0404,-0.0104,-0.0419,0.0450,0.0170,-0.0127,0.0080,0.0467,-0.0452,0.0390,-0.0342,-0.0004,0.0017,-0.0266,0.0174,-0.0157,-0.0041,0.0045
[中略]
,0.0022,-0.0018,0.0008,-0.0287,0.0211,-0.0030,-0.0216,-0.0212,-0.0043,-0.0008,0.0116,0.0019,0.0274,-0.0148,0.0165,0.0183,-0.0073,0.0512,0.0150,-0.0203,0.0062,-0.0134,0.0131,-0.0779,0.0168,-0.0278,0.0134,-0.0294,0.0017,0.0172,0.0181,0.0314,-0.0122,0.0228,-0.0028,0.0466,0.0105,-0.0026,0.0192,0.0345,-0.0400,0.0104,0.0249,0.0032,0.0020,-0.0242,0.0219,-0.0086,-0.0558,-0.0021,-0.0185,-0.0048,-0.0218,-0.0332,-0.0001,0.0017,-0.0067,-0.0370,-0.0142,0.0523,-0.0179,0.0022,0.0572,-0.0072,0.0157,0.0148,-0.0072,0.0093,0.0372,-0.0104,0.0039,-0.0451,-0.0535,-0.0331,0.0259,0.0135,-0.0163,-0.0485,0.0035,0.0305,0.0150,-0.0190,-0.0087,0.0188,0.0075,0.0080,0.0271,0.0631,0.0171,0.0356,0.0160,0.0133,0.0228,0.0113,0.0103,-0.0215,0.0252,0.0087,0.0027,0.0206,0.0030,-0.0147,0.0014,0.0494,0.0320,0.0094,0.0452,-0.0006,0.0057,-0.0065,-0.0080,-0.0045,-0.0446,-0.0305,0.0051,0.0176,0.0033,-0.0374,0.0004,-0.0307,-0.0268,-0.0089,0.0405,-0.0036,0.0190,-0.0016,0.0068,-0.0052,0.0190,-0.0213,0.0217,0.0030,-0.0163,0.0151,-0.0079,-0.0696,0.0154,0.0107,0.0184,-0.0247,0.0072,-0.0284,0.0532,0.0005,-0.0145,0.0621,0.0300,-0.0178,-0.0078,0.0270,-0.0044,-0.0003,0.0185]","sparse_vecs":"[65535, [1,3,5,6,7], [0.3588,0.2678,0.2274,0.2076,0.2058]]","vocab_size_used":65535,"global_vocab_size":9}]
bge-m3-service %
declare | |
l_request json_object_t; | |
l_texts_arr json_array_t; | |
l_request_clob clob; | |
l_response clob; | |
l_response_json json_array_t; | |
e_call_api_failed exception; | |
l_vec json_object_t; | |
l_dvec clob; | |
l_svec clob; | |
l_vocab_size_used number; | |
begin | |
apex_web_service.set_request_headers('Content-Type', 'application/json'); | |
for r in (select * from ebaj_sample_texts where is_updated) | |
loop | |
l_request := json_object_t(); | |
l_texts_arr := json_array_t(); | |
l_texts_arr.append(r.text); | |
l_request.put('texts', l_texts_arr); | |
l_request_clob := l_request.to_clob(); | |
l_response := apex_web_service.make_rest_request( | |
p_url => :G_ENDPOINT | |
,p_http_method => 'POST' | |
,p_body => l_request_clob | |
); | |
if apex_web_service.g_status_code <> 200 then | |
raise e_call_api_failed; | |
end if; | |
l_response_json := json_array_t(l_response); | |
for i in 1..(l_response_json.get_size()) | |
loop | |
l_vec := treat(l_response_json.get(i-1) as json_object_t); | |
l_dvec := l_vec.get_clob('dense_vecs'); | |
l_svec := l_vec.get_clob('sparse_vecs'); | |
l_vocab_size_used := l_vec.get_number('vocab_size_used'); | |
update ebaj_sample_texts set dvec = to_vector(l_dvec), svec = to_vector(l_svec, 65535, FLOAT32, SPARSE), is_updated = false where id = r.id; | |
end loop; | |
end loop; | |
commit; | |
end; |
- You can’t do sparse-only queries.
- You can’t use integrated embedding and reranking.