cogstack-platform/helm-charts/medcat-service-helm/values.yaml at b2c0fa32581bbed818099f80c523ff11fdd9b0b8 · CogStack/cogstack-platform · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# Default values for medcat-service.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# -- This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
replicaCount: 1

# -- This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
image:
  # -- Image repository for the MedCAT service container
  repository: cogstacksystems/medcat-service
  # repository: cogstacksystems/medcat-service-gpu
  # -- This sets the pull policy for images.
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  # tag: "latest"

env:
  APP_ENABLE_DEMO_UI: true

  # -- This defines the Model Pack used by the medcat service
  # Example (download on startup): uncomment `ENABLE_MODEL_DOWNLOAD` and the `MODEL_*` URLs below.
  # Example (DeID mode): uncomment `DEID_MODE`/`DEID_REDACT` and use the DeID model pack referenced below.
  APP_MEDCAT_MODEL_PACK: "/cat/models/examples/example-medcat-v2-model-pack.zip"

  # Alternatively download a model on startup from a URL
  # ENABLE_MODEL_DOWNLOAD: "true"
  # MODEL_NAME: "medmen"
  # MODEL_VOCAB_URL: "https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat"
  # MODEL_CDB_URL: "https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/cdb-medmen-v1.dat"
  # MODEL_META_URL: "https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/mc_status.zip"
  # APP_MODEL_CDB_PATH: "/cat/models/medmen/cdb.dat"

  # Enable DeID mode with the following
  # APP_MEDCAT_MODEL_PACK: "/cat/models/examples/example-deid-model-pack.zip"
  # DEID_MODE: true
  # DEID_REDACT: true

  # -- Set SERVER_GUNICORN_MAX_REQUESTS to a high number instead of the default 1000. Trust k8s instead to restart pod when needed.
  # Example (tuning): see the commented `SERVER_GUNICORN_EXTRA_ARGS` setting below.
  SERVER_GUNICORN_MAX_REQUESTS: "100000"

  # Recommended env vars to set to try to limit to 1 CPU for scaling
  # OMP_NUM_THREADS: "1"
  # OPENBLAS_NUM_THREADS: "1"
  # MKL_NUM_THREADS: "1"
  # VECLIB_MAXIMUM_THREADS: "1"
  # NUMEXPR_NUM_THREADS: "1"
  # TOKENIZERS_PARALLELISM: "false"
  # PYTORCH_ENABLE_MPS_FALLBACK: "1"
  # SERVER_GUNICORN_EXTRA_ARGS: "--worker-connections 1 --backlog 1"

  # Recommended env vars for GPU support
  # APP_CUDA_DEVICE_COUNT: "1"
  # APP_TORCH_THREADS: "-1"

  # -- Observability Env Vars
  APP_ENABLE_METRICS: true
  APP_ENABLE_TRACING: false
  OTEL_EXPORTER_OTLP_ENDPOINT: "http://<unused>:4317"
  OTEL_SERVICE_NAME: "medcat-service"
  OTEL_TRACES_EXPORTER: "otlp"
  OTEL_EXPORTER_OTLP_PROTOCOL: "grpc"
  OTEL_EXPERIMENTAL_RESOURCE_DETECTORS: "containerid,os"
  OTEL_RESOURCE_ATTRIBUTES: "k8s.pod.uid=$(K8S_POD_UID),k8s.pod.name=$(K8S_POD_NAME),k8s.namespace.name=$(K8S_POD_NAMESPACE),k8s.node.name=$(K8S_NODE_NAME)"
  OTEL_METRICS_EXPORTER: "none"
  OTEL_LOGS_EXPORTER: "none"
  OTEL_PYTHON_FASTAPI_EXCLUDED_URLS: "/api/health,/metrics"

# -- Enable downloading of public models using wget on startup. Model will be downloaded to /models/<name> and used for APP_MEDCAT_MODEL_PACK
# Example: uncomment `model.downloadUrl` and `model.name` below to fetch a model pack at startup.
model: {}
  # Public URL to download a model pack from
  # downloadUrl: "http://localhost:9000/models/my-model.zip"
  # Name of the model pack to save to. Will be stored at /models/<name>
  # name: my-model.zip

# -- Allow setting env values from field/configmap/secret references. Defaults to include k8s details for observability.
envValueFrom:
  K8S_NODE_NAME:
    fieldRef:
      fieldPath: spec.nodeName
  K8S_POD_NAME:
    fieldRef:
      fieldPath: metadata.name
  K8S_POD_UID:
    fieldRef:
      fieldPath: metadata.uid
  K8S_POD_NAMESPACE:
    fieldRef:
      fieldPath: metadata.namespace

# -- This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets: []
# -- This is to override the chart name.
nameOverride: ""
fullnameOverride: ""

updateStrategy:
  # -- Used for Kubernetes deployment .spec.strategy.type. Allowed values are "Recreate" or "RollingUpdate".
  type: RollingUpdate
  # RollingUpdate default options example
  # rollingUpdate:
  #   maxSurge: 25%
  #   maxUnavailable: 25%

# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
  # -- Specifies whether a service account should be created
  create: true
  # -- Automatically mount a ServiceAccount's API credentials?
  automount: true
  # -- Annotations to add to the service account
  annotations: {}
  # -- The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

# -- This is for setting Kubernetes Annotations to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
podAnnotations: {}
# -- This is for setting Kubernetes Labels to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
podLabels: {}

podSecurityContext: {}
  # fsGroup: 2000

securityContext: {}
  # capabilities:
  #   drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000

# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
  # -- This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
  type: ClusterIP
  # -- This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
  port: 5000

# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
  enabled: false
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  # - Optional alternative to hosts: allows for global ingress without hostname. Overrides hosts if both are set.
  # http:
  #   - paths:
  #       - path: /medcat-service
  #         pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local

# -- Configure resources for the pod. More information can be found here: https://kubernetes.io/docs/concepts/containers/
# Recommendation for a default production model is { requests: { cpu: 1, memory: 4Gi }, limits: { cpu: null <unset>, memory: 4Gi } }
resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 2 # Note one general k8s recommendation is to leave CPU limit unset
  #   memory: 4Gi
  # requests:
  #   cpu: 1
  #   memory: 2Gi

# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
  httpGet:
    path: /api/health/live
    port: http
readinessProbe:
  httpGet:
    path: /api/health/ready
    port: http
startupProbe:
  httpGet:
    path: /api/health/ready
    port: http
  failureThreshold: 30
  periodSeconds: 10
  initialDelaySeconds: 2

# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80

# -- Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
#   secret:
#     secretName: mysecret
#     optional: false

# -- Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
#   mountPath: "/etc/foo"
#   readOnly: true

# -- Runtime class name for the pod (e.g., "nvidia" for GPU workloads)
# More information: https://kubernetes.io/docs/concepts/containers/runtime-class/
runtimeClassName: ""

nodeSelector: {}

tolerations: []

affinity: {}

# -- Host aliases for the pod
hostAliases: []
  # - ip: "127.0.0.1"
  #   hostnames:
  #   - "foo.local"

networkPolicy:
  # -- Choose to create a default network policy blocking all ingress other than to the service port.
  enabled: true
  egress:
    # -- Choose to block egress by enabling it in the network policy
    enabled: false
    # -- Append any custom egress rules following the standard format
    egressRules: []
      # # Example format
      # - to:
      #     - podSelector:
      #         matchLabels:
      #           app.kubernetes.io/name: model-downloader
      #   ports:
      #     - port: 5000

# -- Additional init containers to run before the main container. Can be templated
extraInitContainers: []

# -- Additional manifests to deploy to kubernetes. Can be templated
extraManifests: []

# -- Create a Prometheus ServiceMonitor for the medcat service. Requires the Prometheus Operator to be installed
# Ensure APP_ENABLE_METRICS is set to true to expose the /metrics endpoint.
serviceMonitor:
  # -- Set to true to enable creation of a ServiceMonitor resource
  enabled: false
  # -- HTTP path where metrics are exposed.
  path: /metrics
  # -- Scheme to use for scraping.
  scheme: http
  # -- Frequency at which Prometheus will scrape metrics.
  interval: 10s
  # -- Additional labels to be added to the ServiceMonitor
  labels: {}
  tlsConfig: {}