Skip to content

Commit

Permalink
Python: improved text_search folder testing (#9984)
Browse files Browse the repository at this point in the history
### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
added unit tests to up the test coverage across the data folder
Also redoes exceptions for everything vector.

This does introduce some breaking changes on the exceptions returned by
the different methods for vector stores, they are still marked
experimental and this will go a long way in moving towards release.

Closes #9485 

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone 😄
  • Loading branch information
eavanvalkenburg authored Dec 19, 2024
1 parent 36701a2 commit 3bab848
Show file tree
Hide file tree
Showing 60 changed files with 1,439 additions and 960 deletions.
7 changes: 1 addition & 6 deletions python/.vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
"locale": "en-US"
}
],
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"[python]": {
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit",
Expand All @@ -29,7 +24,7 @@
"notebook.formatOnSave.enabled": true,
"notebook.codeActionsOnSave": {
"source.fixAll": true,
"source.organizeImports": true
"source.organizeImports": false
},
"python.analysis.extraPaths": [
"${workspaceFolder}/samples/learn_resources"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,11 @@
from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin
from semantic_kernel.exceptions import MemoryConnectorException, MemoryConnectorInitializationError
from semantic_kernel.exceptions import (
VectorSearchExecutionException,
VectorStoreInitializationException,
VectorStoreOperationException,
)
from semantic_kernel.utils.experimental_decorator import experimental_class

logger: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,7 +95,7 @@ def __init__(
if not collection_name:
collection_name = search_client._index_name
elif search_client._index_name != collection_name:
raise MemoryConnectorInitializationError(
raise VectorStoreInitializationException(
"Search client and search index client have different index names."
)
super().__init__(
Expand All @@ -107,7 +111,7 @@ def __init__(

if search_index_client:
if not collection_name:
raise MemoryConnectorInitializationError("Collection name is required.")
raise VectorStoreInitializationException("Collection name is required.")
super().__init__(
data_model_type=data_model_type,
data_model_definition=data_model_definition,
Expand All @@ -133,14 +137,14 @@ def __init__(
index_name=collection_name,
)
except ValidationError as exc:
raise MemoryConnectorInitializationError("Failed to create Azure Cognitive Search settings.") from exc
raise VectorStoreInitializationException("Failed to create Azure Cognitive Search settings.") from exc
search_index_client = get_search_index_client(
azure_ai_search_settings=azure_ai_search_settings,
azure_credential=kwargs.get("azure_credentials"),
token_credential=kwargs.get("token_credentials"),
)
if not azure_ai_search_settings.index_name:
raise MemoryConnectorInitializationError("Collection name is required.")
raise VectorStoreInitializationException("Collection name is required.")

super().__init__(
data_model_type=data_model_type,
Expand Down Expand Up @@ -211,7 +215,7 @@ async def create_collection(self, **kwargs) -> None:
if isinstance(index, SearchIndex):
await self.search_index_client.create_index(index=index, **kwargs)
return
raise MemoryConnectorException("Invalid index type supplied.")
raise VectorStoreOperationException("Invalid index type supplied, should be a SearchIndex object.")
await self.search_index_client.create_index(
index=data_model_definition_to_azure_ai_search_index(
collection_name=self.collection_name,
Expand Down Expand Up @@ -279,7 +283,10 @@ async def _inner_search(
for name, field in self.data_model_definition.fields.items()
if not isinstance(field, VectorStoreRecordVectorField)
]
raw_results = await self.search_client.search(**search_args)
try:
raw_results = await self.search_client.search(**search_args)
except Exception as exc:
raise VectorSearchExecutionException("Failed to search the collection.") from exc
return KernelSearchResults(
results=self._get_vector_search_results_from_results(raw_results, options),
total_count=await raw_results.get_count() if options.include_total_count else None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from semantic_kernel.connectors.memory.azure_ai_search.utils import get_search_client, get_search_index_client
from semantic_kernel.data.record_definition import VectorStoreRecordDefinition
from semantic_kernel.data.vector_storage import VectorStore
from semantic_kernel.exceptions import MemoryConnectorInitializationError
from semantic_kernel.exceptions import VectorStoreInitializationException
from semantic_kernel.utils.experimental_decorator import experimental_class

if TYPE_CHECKING:
Expand Down Expand Up @@ -78,7 +78,7 @@ def __init__(
env_file_encoding=env_file_encoding,
)
except ValidationError as exc:
raise MemoryConnectorInitializationError("Failed to create Azure AI Search settings.") from exc
raise VectorStoreInitializationException("Failed to create Azure AI Search settings.") from exc
search_index_client = get_search_index_client(
azure_ai_search_settings=azure_ai_search_settings,
azure_credential=azure_credentials,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_settings import AzureCosmosDBNoSQLSettings
from semantic_kernel.connectors.memory.azure_cosmos_db.utils import CosmosClientWrapper
from semantic_kernel.exceptions.memory_connector_exceptions import (
MemoryConnectorInitializationError,
MemoryConnectorResourceNotFound,
from semantic_kernel.exceptions import (
VectorStoreInitializationException,
VectorStoreOperationException,
)
from semantic_kernel.kernel_pydantic import KernelBaseModel
from semantic_kernel.utils.authentication.async_default_azure_credential_wrapper import (
Expand Down Expand Up @@ -63,10 +63,10 @@ def __init__(
env_file_encoding=env_file_encoding,
)
except ValidationError as e:
raise MemoryConnectorInitializationError("Failed to validate Azure Cosmos DB NoSQL settings.") from e
raise VectorStoreInitializationException("Failed to validate Azure Cosmos DB NoSQL settings.") from e

if cosmos_db_nosql_settings.database_name is None:
raise MemoryConnectorInitializationError("The name of the Azure Cosmos DB NoSQL database is missing.")
raise VectorStoreInitializationException("The name of the Azure Cosmos DB NoSQL database is missing.")

if cosmos_client is None:
if cosmos_db_nosql_settings.key is not None:
Expand Down Expand Up @@ -94,7 +94,7 @@ async def _does_database_exist(self) -> bool:
except CosmosResourceNotFoundError:
return False
except Exception as e:
raise MemoryConnectorResourceNotFound(
raise VectorStoreOperationException(
f"Failed to check if database '{self.database_name}' exists, with message {e}"
) from e

Expand All @@ -106,14 +106,14 @@ async def _get_database_proxy(self, **kwargs) -> DatabaseProxy:

if self.create_database:
return await self.cosmos_client.create_database(self.database_name, **kwargs)
raise MemoryConnectorResourceNotFound(f"Database '{self.database_name}' does not exist.")
raise VectorStoreOperationException(f"Database '{self.database_name}' does not exist.")
except Exception as e:
raise MemoryConnectorResourceNotFound(f"Failed to get database proxy for '{id}'.") from e
raise VectorStoreOperationException(f"Failed to get database proxy for '{id}'.") from e

async def _get_container_proxy(self, container_name: str, **kwargs) -> ContainerProxy:
"""Gets the container proxy."""
try:
database_proxy = await self._get_database_proxy(**kwargs)
return database_proxy.get_container_client(container_name)
except Exception as e:
raise MemoryConnectorResourceNotFound(f"Failed to get container proxy for '{container_name}'.") from e
raise VectorStoreOperationException(f"Failed to get container proxy for '{container_name}'.") from e
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing_extensions import override # pragma: no cover

from azure.cosmos.aio import CosmosClient
from azure.cosmos.exceptions import CosmosBatchOperationError, CosmosHttpResponseError, CosmosResourceNotFoundError
from azure.cosmos.exceptions import CosmosHttpResponseError
from azure.cosmos.partition_key import PartitionKey

from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_base import AzureCosmosDBNoSQLBase
Expand All @@ -36,10 +36,10 @@
from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
from semantic_kernel.data.vector_search.vector_text_search import VectorTextSearchMixin
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin
from semantic_kernel.exceptions.memory_connector_exceptions import (
MemoryConnectorException,
MemoryConnectorResourceNotFound,
from semantic_kernel.exceptions import (
VectorSearchExecutionException,
VectorStoreModelDeserializationException,
VectorStoreOperationException,
)
from semantic_kernel.kernel_types import OneOrMany
from semantic_kernel.utils.experimental_decorator import experimental_class
Expand Down Expand Up @@ -120,15 +120,8 @@ async def _inner_upsert(
**kwargs: Any,
) -> Sequence[TKey]:
container_proxy = await self._get_container_proxy(self.collection_name, **kwargs)
try:
results = await asyncio.gather(*(container_proxy.upsert_item(record) for record in records))
return [result[COSMOS_ITEM_ID_PROPERTY_NAME] for result in results]
except CosmosResourceNotFoundError as e:
raise MemoryConnectorResourceNotFound(
"The collection does not exist yet. Create the collection first."
) from e
except (CosmosBatchOperationError, CosmosHttpResponseError) as e:
raise MemoryConnectorException("Failed to upsert items.") from e
results = await asyncio.gather(*(container_proxy.upsert_item(record) for record in records))
return [result[COSMOS_ITEM_ID_PROPERTY_NAME] for result in results]

@override
async def _inner_get(self, keys: Sequence[TKey], **kwargs: Any) -> OneOrMany[Any] | None:
Expand All @@ -140,14 +133,7 @@ async def _inner_get(self, keys: Sequence[TKey], **kwargs: Any) -> OneOrMany[Any
parameters: list[dict[str, Any]] = [{"name": f"@id{i}", "value": get_key(key)} for i, key in enumerate(keys)]

container_proxy = await self._get_container_proxy(self.collection_name, **kwargs)
try:
return [item async for item in container_proxy.query_items(query=query, parameters=parameters)]
except CosmosResourceNotFoundError as e:
raise MemoryConnectorResourceNotFound(
"The collection does not exist yet. Create the collection first."
) from e
except Exception as e:
raise MemoryConnectorException("Failed to read items.") from e
return [item async for item in container_proxy.query_items(query=query, parameters=parameters)]

@override
async def _inner_delete(self, keys: Sequence[TKey], **kwargs: Any) -> None:
Expand All @@ -158,7 +144,7 @@ async def _inner_delete(self, keys: Sequence[TKey], **kwargs: Any) -> None:
)
exceptions = [result for result in results if isinstance(result, Exception)]
if exceptions:
raise MemoryConnectorException("Failed to delete item(s).", exceptions)
raise VectorStoreOperationException("Failed to delete item(s).", exceptions)

@override
async def _inner_search(
Expand All @@ -177,12 +163,12 @@ async def _inner_search(
query = self._build_vector_query(options)
params.append({"name": "@vector", "value": vector})
else:
raise ValueError("Either search_text or vector must be provided.")
raise VectorSearchExecutionException("Either search_text or vector must be provided.")
container_proxy = await self._get_container_proxy(self.collection_name, **kwargs)
try:
results = container_proxy.query_items(query, parameters=params)
except Exception as e:
raise MemoryConnectorException("Failed to search items.") from e
except Exception as exc:
raise VectorSearchExecutionException("Failed to search items.") from exc
return KernelSearchResults(
results=self._get_vector_search_results_from_results(results, options),
total_count=None,
Expand Down Expand Up @@ -286,38 +272,38 @@ def _deserialize_store_models_to_dicts(self, records: Sequence[Any], **kwargs: A

@override
async def create_collection(self, **kwargs) -> None:
indexing_policy = kwargs.pop("indexing_policy", create_default_indexing_policy(self.data_model_definition))
vector_embedding_policy = kwargs.pop(
"vector_embedding_policy", create_default_vector_embedding_policy(self.data_model_definition)
)
database_proxy = await self._get_database_proxy(**kwargs)
try:
database_proxy = await self._get_database_proxy(**kwargs)
await database_proxy.create_container_if_not_exists(
id=self.collection_name,
partition_key=self.partition_key,
indexing_policy=kwargs.pop(
"indexing_policy", create_default_indexing_policy(self.data_model_definition)
),
vector_embedding_policy=kwargs.pop(
"vector_embedding_policy", create_default_vector_embedding_policy(self.data_model_definition)
),
indexing_policy=indexing_policy,
vector_embedding_policy=vector_embedding_policy,
**kwargs,
)
except CosmosHttpResponseError as e:
raise MemoryConnectorException("Failed to create container.") from e
raise VectorStoreOperationException("Failed to create container.") from e

@override
async def does_collection_exist(self, **kwargs) -> bool:
container_proxy = await self._get_container_proxy(self.collection_name, **kwargs)
try:
container_proxy = await self._get_container_proxy(self.collection_name, **kwargs)
await container_proxy.read(**kwargs)
return True
except CosmosHttpResponseError:
return False

@override
async def delete_collection(self, **kwargs) -> None:
database_proxy = await self._get_database_proxy(**kwargs)
try:
database_proxy = await self._get_database_proxy(**kwargs)
await database_proxy.delete_container(self.collection_name)
except CosmosHttpResponseError as e:
raise MemoryConnectorException("Container could not be deleted.") from e
except Exception as e:
raise VectorStoreOperationException("Container could not be deleted.") from e

@override
async def __aexit__(self, exc_type, exc_value, traceback) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from semantic_kernel.data.record_definition.vector_store_model_definition import VectorStoreRecordDefinition
from semantic_kernel.data.vector_storage.vector_store import VectorStore
from semantic_kernel.data.vector_storage.vector_store_record_collection import VectorStoreRecordCollection
from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorException
from semantic_kernel.exceptions import VectorStoreOperationException
from semantic_kernel.utils.experimental_decorator import experimental_class

TModel = TypeVar("TModel")
Expand Down Expand Up @@ -93,7 +93,7 @@ async def list_collection_names(self, **kwargs) -> Sequence[str]:
containers = database.list_containers()
return [container["id"] async for container in containers]
except Exception as e:
raise MemoryConnectorException("Failed to list collection names.") from e
raise VectorStoreOperationException("Failed to list collection names.") from e

async def __aexit__(self, exc_type, exc_value, traceback) -> None:
"""Exit the context manager."""
Expand Down
Loading

0 comments on commit 3bab848

Please sign in to comment.