Skip to content

Commit 74faae0

Browse files
authored
Merge pull request #435 from NHSDigital/master
APM-4219 Use latest file on build artifact
2 parents 15cc834 + d54a28e commit 74faae0

4 files changed

Lines changed: 79 additions & 36 deletions

File tree

azure/cleanup-ecs-pr-proxies.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ trigger: none
44
pr: none
55

66
schedules:
7-
- cron: "0 2 * * *"
7+
- cron: "0 */4 * * *" # Every 4 hours
88
displayName: Daily PR cleanup
99
branches:
1010
include:
@@ -14,11 +14,16 @@ schedules:
1414
parameters:
1515
- name: retries
1616
type: object
17+
displayName: Retries
1718
default:
1819
- "0"
1920
- "1"
2021
- "2"
2122
- "3"
23+
- name: retain_hours
24+
displayName: Retain hours
25+
type: string
26+
default: "72"
2227

2328
jobs:
2429
- job: build
@@ -47,6 +52,13 @@ jobs:
4752
- bash: |
4853
tfenv use 0.14.6
4954
displayName: setup terraform
55+
56+
- task: s3-cache-action@1
57+
inputs:
58+
key: poetry | utils | poetry.lock
59+
location: ".venv"
60+
debug: true
61+
displayName: cache utils pre-requisites
5062

5163
- bash: |
5264
make install
@@ -56,10 +68,11 @@ jobs:
5668
- template: ./components/cleanup-ecs-pr-proxies-job.yml
5769
parameters:
5870
retry: '${{ retry }}'
71+
retain_hours: '${{ parameters.retain_hours }}'
5972

6073
- bash: |
6174
echo "AWS role session has timed out after multiple retries"
6275
exit -1
6376
displayName: Trigger failure if role has timed out
64-
condition: eq(variables['has_aws_role_timedout'], 'true')
77+
condition: eq(variables['should_retry'], 'true')
6578
Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
11
parameters:
22
- name: retry
33
type: string
4+
- name: retain_hours
5+
type: string
46

57
steps:
6-
- template: ./aws-assume-role.yml
7-
parameters:
8-
role: "auto-ops"
9-
profile: "apm_ptl"
8+
- template: ./aws-assume-role.yml
9+
parameters:
10+
role: "auto-ops"
11+
profile: "apm_ptl"
12+
13+
- bash: make remove-stale-locks
14+
displayName: Remove stale locks
15+
16+
- bash: |
17+
export retain_hours="${{ parameters.retain_hours }}"
18+
ANSIBLE_FORCE_COLOR=yes make -C ansible remove-old-ecs-pr-deploys
19+
ERROR_CODE=$?
20+
echo ERROR_CODE - $ERROR_CODE
1021
11-
- bash: |
12-
make remove-stale-locks
13-
export retain_hours=72
14-
ANSIBLE_FORCE_COLOR=yes make -C ansible remove-old-ecs-pr-deploys | tee /tmp/output.txt
15-
ERROR_CODE=$?
16-
ROLE_TIMEOUT_MSG="The AWS assume role session token is due to expire"
17-
if grep -q "$ROLE_TIMEOUT_MSG" /tmp/output.txt ; then
18-
echo "stderr for ansible has the error \"$ROLE_TIMEOUT_MSG\""
19-
echo "Re-assuming role and re-running step"
20-
echo "##vso[task.setvariable variable=has_aws_role_timedout;]true"
22+
if [ $ERROR_CODE -ne 0 ] ; then
23+
echo "\n\nansible has unhandled error, re-trying"
24+
echo "##vso[task.setvariable variable=should_retry;]true"
2125
22-
elif [ $ERROR_CODE -ne 0 ] ; then
23-
echo "\n\nansible has unhandled error, re-raising"
24-
exit -1
25-
else
26-
echo "##vso[task.setvariable variable=has_aws_role_timedout;]false"
27-
fi
26+
else
27+
echo "##vso[task.setvariable variable=should_retry;]false"
28+
fi
2829
29-
displayName: "cleanup older pr deploys"
30-
condition: or(eq( ${{ parameters.retry }}, '0'), eq(variables['has_aws_role_timedout'], 'true'))
30+
displayName: cleanup older pr deploys
31+
condition: or(eq( ${{ parameters.retry }}, '0'), eq(variables['should_retry'], 'true'))

azure/components/set-facts.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ parameters:
1919
steps:
2020
- bash: |
2121
set -euo pipefail
22-
echo "!!! If you get an error here, it is because '${{ parameters.service_name }}' is not the source alias name of the artifact"
22+
echo "!!! Using ls -t will set the latest file for the artifact. If you get an error here, it is because '${{ parameters.service_name }}' is not the source alias name of the artifact"
2323
ls -R $(Pipeline.Workspace)
24-
export SERVICE_ARTIFACT_NAME=`ls $(Pipeline.Workspace)/s/${{ parameters.service_name }}`
24+
export SERVICE_ARTIFACT_NAME=`ls -t $(Pipeline.Workspace)/s/${{ parameters.service_name }}`
2525
echo "##vso[task.setvariable variable=SERVICE_ARTIFACT_NAME]$SERVICE_ARTIFACT_NAME"
2626
echo "Set Artifact Name of: $SERVICE_ARTIFACT_NAME"
2727
displayName: 'Set SERVICE_ARTIFACT_NAME'

scripts/terraform_force_unlock.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,41 +26,70 @@
2626

2727

2828
@click.command()
29-
@click.option("--min-age-hr", type=int, default=8)
30-
@click.option("--key-prefix", type=str, default="nhsd-apm-management-ptl-terraform/env:/api-deployment:ptl:")
29+
@click.option("--min-age-hr", type=int, default=4)
30+
@click.option(
31+
"--key-prefix",
32+
type=str,
33+
default="nhsd-apm-management-ptl-terraform/env:/api-deployment:ptl:",
34+
)
3135
@click.option("--table-name", type=str, default="terraform-state-lock")
3236
@click.option("--profile", type=str, default="apm_ptl")
3337
def main(min_age_hr, key_prefix, table_name, profile):
34-
3538
accepted_envs = ["apm_ptl", "apm_prod"]
3639

3740
if profile not in accepted_envs:
3841
raise ValueError("Profile must be apm_ptl or apm_prod")
3942

40-
terraform_lock_table = boto3.Session(profile_name=profile).resource("dynamodb").Table(table_name)
43+
terraform_lock_table = (
44+
boto3.Session(profile_name=profile).resource("dynamodb").Table(table_name)
45+
)
4146

4247
filter_expr = "begins_with(#n0, :v0) AND attribute_exists(#n1)"
4348

4449
ExpressionAttributeNames = {"#n0": "LockID", "#n1": "Info"}
4550
ExpressionAttributeValues = {
4651
":v0": key_prefix,
4752
}
48-
items = terraform_lock_table.scan(FilterExpression=filter_expr, ExpressionAttributeNames=ExpressionAttributeNames, ExpressionAttributeValues=ExpressionAttributeValues)
49-
print(f"Found {len(items['Items'])} locks which start with key prefix '{key_prefix}'")
53+
items = terraform_lock_table.scan(
54+
FilterExpression=filter_expr,
55+
ExpressionAttributeNames=ExpressionAttributeNames,
56+
ExpressionAttributeValues=ExpressionAttributeValues,
57+
)
58+
59+
total_items = items["Items"]
60+
61+
while "LastEvaluatedKey" in items:
62+
items = terraform_lock_table.scan(
63+
FilterExpression=filter_expr,
64+
ExpressionAttributeNames=ExpressionAttributeNames,
65+
ExpressionAttributeValues=ExpressionAttributeValues,
66+
ExclusiveStartKey=items["LastEvaluatedKey"],
67+
)
68+
total_items.extend(items["Items"])
69+
70+
print(
71+
f"Found {len(items['Items'])} locks which start with key prefix '{key_prefix}'"
72+
)
5073

5174
removed_count = 0
52-
for lock_item in items["Items"]:
75+
for lock_item in total_items:
5376
lock_item_info = json.loads(lock_item["Info"])
5477
lock_id = lock_item["LockID"]
5578
created_at = dateutil.parser.parse(lock_item_info["Created"])
5679

57-
if datetime.datetime.now(datetime.timezone.utc) - created_at > datetime.timedelta(hours=min_age_hr):
58-
print(f"{lock_id} {created_at=} is more than {min_age_hr} hours old, deleting lock...")
80+
if datetime.datetime.now(
81+
datetime.timezone.utc
82+
) - created_at > datetime.timedelta(hours=min_age_hr):
83+
print(
84+
f"{lock_id} {created_at=} is more than {min_age_hr} hours old, deleting lock..."
85+
)
5986
terraform_lock_table.delete_item(Key={"LockID": lock_id})
6087
removed_count += 1
6188

6289
else:
63-
print(f"{lock_id} {created_at=} is not more than {min_age_hr} hours old, leaving it alone!")
90+
print(
91+
f"{lock_id} {created_at=} is not more than {min_age_hr} hours old, leaving it alone!"
92+
)
6493

6594
print(f"Removed {removed_count} locks")
6695

0 commit comments

Comments
 (0)