-
Notifications
You must be signed in to change notification settings - Fork 2
feat: add AppwriteEmbeddingAdapter and corresponding tests; update do… #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
4c6e7a7
feat: add AppwriteEmbeddingAdapter and corresponding tests; update do…
ArnabChatterjee20k b4d6240
fix: update docblock to make modelLoadingDuration optional in embed m…
ArnabChatterjee20k 367dfe7
simplified the name
ArnabChatterjee20k 90c9ebd
feat: update docker-compose for appwrite-embedding service and add Ap…
ArnabChatterjee20k deaf063
feat: implement bulk embedding method across adapters and add corresp…
ArnabChatterjee20k 57189d8
refactor: update test method names to follow camelCase convention and…
ArnabChatterjee20k 3d5313a
fix: update assertions in AppwriteTest to check for non-empty embeddings
ArnabChatterjee20k 9828fb9
refactor: remove testBulkEmbedEmptyArrayThrows method from Adapter te…
ArnabChatterjee20k 5d61c64
updated
ArnabChatterjee20k 5aad21b
updated
ArnabChatterjee20k File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,256 @@ | ||
| <?php | ||
|
|
||
| namespace Utopia\Agents\Adapters; | ||
|
|
||
| use Utopia\Agents\Adapter; | ||
| use Utopia\Agents\Message; | ||
| use Utopia\Fetch\Client; | ||
|
|
||
| class Appwrite extends Adapter | ||
| { | ||
| /** | ||
| * NomicEmbedTextV15 - default general purpose text embedding model | ||
| */ | ||
| public const MODEL_NOMIC_EMBED_TEXT = 'nomic-embed-text'; | ||
|
|
||
| /** | ||
| * EmbeddingGemma300M - Gemma embedding model | ||
| */ | ||
| public const MODEL_EMBEDDING_GEMMA = 'embedding-gemma'; | ||
|
|
||
| /** | ||
| * AllMiniLML6V2 - small, fast sentence embedding model | ||
| */ | ||
| public const MODEL_ALL_MINILM = 'all-minilm'; | ||
|
|
||
| /** | ||
| * BGESmallENV15 - small English embedding model | ||
| */ | ||
| public const MODEL_BGE_SMALL = 'bge-small'; | ||
|
|
||
| protected string $model; | ||
|
|
||
| private string $endpoint = 'http://appwrite-embedding:11434/embed'; | ||
|
|
||
| public const MODELS = [ | ||
| self::MODEL_NOMIC_EMBED_TEXT, | ||
| self::MODEL_EMBEDDING_GEMMA, | ||
| self::MODEL_ALL_MINILM, | ||
| self::MODEL_BGE_SMALL, | ||
| ]; | ||
|
|
||
| /** | ||
| * Embedding dimensions of specific embedding model | ||
| */ | ||
| protected const DIMENSIONS = [ | ||
| self::MODEL_NOMIC_EMBED_TEXT => 768, | ||
| self::MODEL_EMBEDDING_GEMMA => 768, | ||
| self::MODEL_ALL_MINILM => 384, | ||
| self::MODEL_BGE_SMALL => 384, | ||
| ]; | ||
|
|
||
| /** | ||
| * Create a new Appwrite embedding adapter (no API key required for local call) | ||
| */ | ||
| public function __construct( | ||
| string $model = self::MODEL_NOMIC_EMBED_TEXT, | ||
| int $timeout = 90000 | ||
| ) { | ||
| if (! in_array($model, self::MODELS, true)) { | ||
| throw new \InvalidArgumentException("Invalid model: {$model}. Supported models: ".implode(', ', self::MODELS)); | ||
| } | ||
|
|
||
| $this->model = $model; | ||
| $this->setTimeout($timeout); | ||
| } | ||
|
|
||
| /** | ||
| * Embedding generation (the embedding service only supports embeddings, not chat) | ||
| * | ||
| * @return array{ | ||
| * embedding: array<int, float>, | ||
| * tokensProcessed: int|null, | ||
| * totalDuration: int|null | ||
| * } | ||
| * | ||
| * @throws \Exception | ||
| */ | ||
| public function embed(string $text): array | ||
| { | ||
| $result = $this->bulkEmbed([$text]); | ||
|
|
||
| return [ | ||
| 'embedding' => $result['embeddings'][0], | ||
| 'tokensProcessed' => $result['tokensProcessed'], | ||
| 'totalDuration' => $result['totalDuration'], | ||
| ]; | ||
| } | ||
|
|
||
| /** | ||
| * Bulk embedding generation — sends multiple texts in a single request. | ||
| * | ||
| * @param array<int, string> $texts | ||
| * @return array{ | ||
| * embeddings: array<int, array<int, float>>, | ||
| * tokensProcessed: int|null, | ||
| * totalDuration: int|null | ||
| * } | ||
| * | ||
| * @throws \Exception | ||
| */ | ||
| public function bulkEmbed(array $texts): array | ||
| { | ||
| if (empty($texts)) { | ||
| throw new \InvalidArgumentException('bulkEmbed requires at least one text'); | ||
| } | ||
|
|
||
| $client = new Client(); | ||
| $client->setTimeout($this->timeout); | ||
| $client->addHeader('Content-Type', 'application/json'); | ||
| $payload = [ | ||
| 'model' => $this->model, | ||
| 'texts' => array_values($texts), | ||
| ]; | ||
| $response = $client->fetch( | ||
| $this->getEndpoint(), | ||
| Client::METHOD_POST, | ||
| $payload | ||
| ); | ||
| $body = $response->getBody(); | ||
| $json = is_string($body) ? json_decode($body, true) : null; | ||
|
|
||
| if (! is_array($json)) { | ||
| throw new \Exception('Invalid response format received from the API'); | ||
| } | ||
|
|
||
| if (isset($json['error'])) { | ||
| throw new \Exception(is_string($json['error']) ? $json['error'] : 'Unknown error', $response->getStatusCode()); | ||
| } | ||
|
|
||
| if (! isset($json['embeddings']) || ! is_array($json['embeddings']) || count($json['embeddings']) !== count($texts)) { | ||
| throw new \Exception('Embedding response missing or count mismatch', $response->getStatusCode()); | ||
| } | ||
|
|
||
| /** @var array<int, array<int, float>> $embeddings */ | ||
| $embeddings = []; | ||
| foreach ($json['embeddings'] as $i => $vec) { | ||
| if (! is_array($vec) || $vec === []) { | ||
| throw new \Exception("Embedding row {$i} missing or empty", $response->getStatusCode()); | ||
| } | ||
| /** @var array<int, float> $vec */ | ||
| $embeddings[] = $vec; | ||
| } | ||
|
|
||
| return [ | ||
| 'embeddings' => $embeddings, | ||
| 'tokensProcessed' => isset($json['tokens']) && is_int($json['tokens']) ? $json['tokens'] : null, | ||
| 'totalDuration' => isset($json['total_duration']) && is_int($json['total_duration']) ? $json['total_duration'] : null, | ||
| ]; | ||
| } | ||
|
|
||
| /** | ||
| * Get available models for embeddings | ||
| * | ||
| * @return array<string> | ||
| */ | ||
| public function getModels(): array | ||
| { | ||
| return self::MODELS; | ||
| } | ||
|
|
||
| /** | ||
| * Get currently selected embedding model | ||
| */ | ||
| public function getModel(): string | ||
| { | ||
| return $this->model; | ||
| } | ||
|
|
||
| /** | ||
| * get embedding dimenion of the current model | ||
| */ | ||
| public function getEmbeddingDimension(): int | ||
| { | ||
| return self::DIMENSIONS[$this->model]; | ||
| } | ||
|
|
||
| /** | ||
| * Set model to use for embedding | ||
| */ | ||
| public function setModel(string $model): self | ||
| { | ||
| if (! in_array($model, self::MODELS, true)) { | ||
| throw new \InvalidArgumentException("Invalid model: {$model}. Supported models: ".implode(', ', self::MODELS)); | ||
| } | ||
| $this->model = $model; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| /** | ||
| * Not applicable for embedding-only adapters. | ||
| * | ||
| * @param array<\Utopia\Agents\Message> $messages | ||
| * | ||
| * @throws \Exception | ||
| */ | ||
| public function send(array $messages, ?callable $listener = null): Message | ||
| { | ||
| throw new \Exception('Appwrite does not support chat or messages. Use embed() instead.'); | ||
| } | ||
|
|
||
| /** | ||
| * Embeddings do not support schema. | ||
| */ | ||
| public function isSchemaSupported(): bool | ||
| { | ||
| return false; | ||
| } | ||
|
|
||
| /** | ||
| * Get the adapter name | ||
| */ | ||
| public function getName(): string | ||
| { | ||
| return 'appwrite-embedding'; | ||
| } | ||
|
|
||
| /** | ||
| * Error formatter (minimal) | ||
| * | ||
| * @param mixed $json | ||
| */ | ||
| protected function formatErrorMessage($json): string | ||
| { | ||
| if (! is_array($json)) { | ||
| return '(unknown_error) Unknown error'; | ||
| } | ||
|
|
||
| $errorValue = $json['error'] ?? ($json['message'] ?? 'Unknown error'); | ||
|
|
||
| return is_string($errorValue) ? $errorValue : 'Unknown error'; | ||
| } | ||
|
|
||
| /** | ||
| * Get the API endpoint | ||
| */ | ||
| public function getEndpoint(): string | ||
| { | ||
| return $this->endpoint; | ||
| } | ||
|
|
||
| /** | ||
| * Set the API endpoint | ||
| */ | ||
| public function setEndpoint(string $endpoint): self | ||
| { | ||
| $this->endpoint = $endpoint; | ||
|
|
||
| return $this; | ||
| } | ||
|
|
||
| public function getSupportForEmbeddings(): bool | ||
| { | ||
| return true; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.