|
17 | 17 | # [START generativeaionvertexai_embedding_code_retrieval] |
18 | 18 | from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel |
19 | 19 |
|
20 | | -MODEL_NAME = "text-embedding-005" |
21 | | -DIMENSIONALITY = 256 |
| 20 | +MODEL_NAME = "gemini-embedding-001" |
| 21 | +DIMENSIONALITY = 3072 |
22 | 22 |
|
23 | 23 |
|
24 | 24 | def embed_text( |
25 | 25 | texts: list[str] = ["Retrieve a function that adds two numbers"], |
26 | 26 | task: str = "CODE_RETRIEVAL_QUERY", |
27 | | - model_name: str = "text-embedding-005", |
28 | | - dimensionality: int | None = 256, |
| 27 | + model_name: str = "gemini-embedding-001", |
| 28 | + dimensionality: int | None = 3072, |
29 | 29 | ) -> list[list[float]]: |
30 | 30 | """Embeds texts with a pre-trained, foundational model.""" |
31 | 31 | model = TextEmbeddingModel.from_pretrained(model_name) |
32 | | - inputs = [TextEmbeddingInput(text, task) for text in texts] |
33 | 32 | kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {} |
34 | | - embeddings = model.get_embeddings(inputs, **kwargs) |
35 | | - # Example response: |
36 | | - # [[0.025890009477734566, -0.05553026497364044, 0.006374752148985863,...], |
37 | | - return [embedding.values for embedding in embeddings] |
| 33 | + |
| 34 | + embeddings = [] |
| 35 | + # gemini-embedding-001 takes one input at a time |
| 36 | + for text in texts: |
| 37 | + text_input = TextEmbeddingInput(text, task) |
| 38 | + embedding = model.get_embeddings([text_input], **kwargs) |
| 39 | + print(embedding) |
| 40 | + # Example response: |
| 41 | + # [[0.006135190837085247, -0.01462465338408947, 0.004978656303137541, ...]] |
| 42 | + embeddings.append(embedding[0].values) |
| 43 | + |
| 44 | + return embeddings |
38 | 45 |
|
39 | 46 |
|
40 | 47 | if __name__ == "__main__": |
|
0 commit comments