|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import minio |
| 4 | +import workflows.recipe |
| 5 | +from workflows.services.common_service import CommonService |
| 6 | + |
| 7 | +from dlstbx.util import iris |
| 8 | +from dlstbx.util.iris import get_minio_client, update_dcid_info_file |
| 9 | + |
| 10 | + |
| 11 | +class S3EchoCollector(CommonService): |
| 12 | + """ |
| 13 | + A service that keeps status of uploads to S3 Echo object store and does garbage collection of unreferenced data. |
| 14 | + """ |
| 15 | + |
| 16 | + # Human readable service name |
| 17 | + _service_name = "S3EchoCollector" |
| 18 | + |
| 19 | + # Logger name |
| 20 | + _logger_name = "dlstbx.services.s3echocollector" |
| 21 | + |
| 22 | + # STFC S3 Echo credentials |
| 23 | + _s3echo_credentials = "/dls_sw/apps/zocalo/secrets/credentials-echo-mx.cfg" |
| 24 | + |
| 25 | + def initializing(self): |
| 26 | + """ |
| 27 | + Register callback functions to upload and download data from S3 Echo object store. |
| 28 | + """ |
| 29 | + self.log.info(f"{S3EchoCollector._service_name} starting") |
| 30 | + |
| 31 | + self.minio_client: minio.Minio = get_minio_client( |
| 32 | + S3EchoCollector._s3echo_credentials |
| 33 | + ) |
| 34 | + |
| 35 | + self._message_delay = 5 |
| 36 | + |
| 37 | + workflows.recipe.wrap_subscribe( |
| 38 | + self._transport, |
| 39 | + "s3echo.start", |
| 40 | + self.on_start, |
| 41 | + acknowledgement=True, |
| 42 | + log_extender=self.extend_log, |
| 43 | + ) |
| 44 | + |
| 45 | + workflows.recipe.wrap_subscribe( |
| 46 | + self._transport, |
| 47 | + "s3echo.end", |
| 48 | + self.on_end, |
| 49 | + acknowledgement=True, |
| 50 | + log_extender=self.extend_log, |
| 51 | + ) |
| 52 | + |
| 53 | + def on_start(self, rw, header, message): |
| 54 | + """ |
| 55 | + Process request for uploading images to S3 Echo object store. |
| 56 | + """ |
| 57 | + # Conditionally acknowledge receipt of the message |
| 58 | + txn = rw.transport.transaction_begin(subscription_id=header["subscription"]) |
| 59 | + rw.transport.ack(header, transaction=txn) |
| 60 | + |
| 61 | + params = rw.recipe_step["parameters"] |
| 62 | + minio_client = get_minio_client(S3EchoCollector._s3echo_credentials) |
| 63 | + |
| 64 | + bucket_name = params["bucket"] |
| 65 | + if not minio_client.bucket_exists(bucket_name): |
| 66 | + minio_client.make_bucket(bucket_name) |
| 67 | + rpid = int(params["rpid"]) |
| 68 | + |
| 69 | + s3echo_upload_files = {} |
| 70 | + if images := params.get("images"): |
| 71 | + dcid = int(params["dcid"]) |
| 72 | + response_info = update_dcid_info_file( |
| 73 | + minio_client, bucket_name, dcid, 0, rpid, self.log |
| 74 | + ) |
| 75 | + try: |
| 76 | + image_files = iris.get_image_files(images, self.log) |
| 77 | + s3echo_upload_files.update( |
| 78 | + {name: (dcid, pth) for name, pth in image_files.items()} |
| 79 | + ) |
| 80 | + except Exception: |
| 81 | + self.log.exception("Error uploading image files to S3 Echo") |
| 82 | + if not response_info: |
| 83 | + self.log.debug("Sending message to upload endpoint") |
| 84 | + rw.send_to( |
| 85 | + "upload", {"s3echo_upload": {dcid: image_files}}, transaction=txn |
| 86 | + ) |
| 87 | + rw.environment.update({"s3echo_upload": s3echo_upload_files}) |
| 88 | + self.log.debug("Sending message to watch endpoint") |
| 89 | + rw.send_to("watch", message, transaction=txn) |
| 90 | + elif params.get("related_images"): |
| 91 | + for dcid, image_master_file in params.get("related_images"): |
| 92 | + response_info = update_dcid_info_file( |
| 93 | + minio_client, bucket_name, dcid, 0, rpid, self.log |
| 94 | + ) |
| 95 | + try: |
| 96 | + image_files = iris.get_related_images_files_from_h5( |
| 97 | + image_master_file, self.log |
| 98 | + ) |
| 99 | + s3echo_upload_files.update( |
| 100 | + {name: (dcid, pth) for name, pth in image_files.items()} |
| 101 | + ) |
| 102 | + if not response_info: |
| 103 | + self.log.debug("Sending message to upload endpoint") |
| 104 | + rw.send_to( |
| 105 | + "upload", |
| 106 | + {"s3echo_upload": {dcid: image_files}}, |
| 107 | + transaction=txn, |
| 108 | + ) |
| 109 | + except Exception: |
| 110 | + self.log.exception("Error uploading image files to S3 Echo") |
| 111 | + rw.environment.update({"s3echo_upload": s3echo_upload_files}) |
| 112 | + self.log.debug("Sending message to watch endpoint") |
| 113 | + rw.send_to("watch", message, transaction=txn) |
| 114 | + rw.transport.transaction_commit(txn) |
| 115 | + |
| 116 | + def on_end(self, rw, header, message): |
| 117 | + """ |
| 118 | + Remove reference to image data in S3 Echo object store after end of processing. |
| 119 | + """ |
| 120 | + # Conditionally acknowledge receipt of the message |
| 121 | + txn = rw.transport.transaction_begin(subscription_id=header["subscription"]) |
| 122 | + rw.transport.ack(header, transaction=txn) |
| 123 | + |
| 124 | + params = rw.recipe_step["parameters"] |
| 125 | + minio_client = get_minio_client(S3EchoCollector._s3echo_credentials) |
| 126 | + bucket_name = params["bucket"] |
| 127 | + rpid = int(params["rpid"]) |
| 128 | + |
| 129 | + for dcid, _ in params.get("related_images", [(int(params["dcid"]), None)]): |
| 130 | + response_info = update_dcid_info_file( |
| 131 | + minio_client, bucket_name, dcid, None, None, self.log |
| 132 | + ) |
| 133 | + if not response_info: |
| 134 | + self.log.warning(f"No {dcid}_info data read from the object store") |
| 135 | + elif response_info["status"] == -1 or ( |
| 136 | + response_info["status"] == 1 and response_info["pid"] == [rpid] |
| 137 | + ): |
| 138 | + dc_objects = { |
| 139 | + obj.object_name |
| 140 | + for obj in minio_client.list_objects(bucket_name) |
| 141 | + if obj.object_name is not None |
| 142 | + } |
| 143 | + for obj_name in dc_objects: |
| 144 | + if obj_name.startswith(f"{dcid}_"): |
| 145 | + minio_client.remove_object(bucket_name, obj_name) |
| 146 | + else: |
| 147 | + update_dcid_info_file( |
| 148 | + minio_client, bucket_name, dcid, None, -rpid, self.log |
| 149 | + ) |
| 150 | + |
| 151 | + rw.transport.transaction_commit(txn) |
0 commit comments