Skip to content

Commit 3b6c972

Browse files
authored
Merge pull request #433 from NHSDigital/apm-3095-run-ecs-cleanup-on-2-day-retain-hours
APM-3095 run ecs cleanup on 2 day retain hours
2 parents 3d467b6 + 74f5324 commit 3b6c972

3 files changed

Lines changed: 67 additions & 28 deletions

File tree

azure/cleanup-ecs-pr-proxies.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,16 @@ schedules:
1414
parameters:
1515
- name: retries
1616
type: object
17+
displayName: Retries
1718
default:
1819
- "0"
1920
- "1"
2021
- "2"
2122
- "3"
23+
- name: retain_hours
24+
displayName: Retain hours
25+
type: string
26+
default: "72"
2227

2328
jobs:
2429
- job: build
@@ -63,6 +68,7 @@ jobs:
6368
- template: ./components/cleanup-ecs-pr-proxies-job.yml
6469
parameters:
6570
retry: '${{ retry }}'
71+
retain_hours: '${{ parameters.retain_hours }}'
6672

6773
- bash: |
6874
echo "AWS role session has timed out after multiple retries"
Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,31 @@
11
parameters:
22
- name: retry
33
type: string
4+
- name: retain_hours
5+
type: string
46

57
steps:
6-
- template: ./aws-assume-role.yml
7-
parameters:
8-
role: "auto-ops"
9-
profile: "apm_ptl"
8+
- template: ./aws-assume-role.yml
9+
parameters:
10+
role: "auto-ops"
11+
profile: "apm_ptl"
12+
13+
- bash: make remove-stale-locks
14+
displayName: Remove stale locks
1015

11-
- bash: |
12-
make remove-stale-locks
13-
export retain_hours=72
14-
ANSIBLE_FORCE_COLOR=yes make -C ansible remove-old-ecs-pr-deploys
15-
ERROR_CODE=$?
16-
echo ERROR_CODE - $ERROR_CODE
16+
- bash: |
17+
export retain_hours="${{ parameters.retain_hours }}"
18+
ANSIBLE_FORCE_COLOR=yes make -C ansible remove-old-ecs-pr-deploys
19+
ERROR_CODE=$?
20+
echo ERROR_CODE - $ERROR_CODE
1721
18-
if [ $ERROR_CODE -ne 0 ] ; then
19-
echo "\n\nansible has unhandled error, re-trying"
20-
echo "##vso[task.setvariable variable=should_retry;]true"
22+
if [ $ERROR_CODE -ne 0 ] ; then
23+
echo "\n\nansible has unhandled error, re-trying"
24+
echo "##vso[task.setvariable variable=should_retry;]true"
2125
22-
else
23-
echo "##vso[task.setvariable variable=should_retry;]false"
24-
fi
26+
else
27+
echo "##vso[task.setvariable variable=should_retry;]false"
28+
fi
2529
26-
displayName: "cleanup older pr deploys"
27-
condition: or(eq( ${{ parameters.retry }}, '0'), eq(variables['should_retry'], 'true'))
30+
displayName: cleanup older pr deploys
31+
condition: or(eq( ${{ parameters.retry }}, '0'), eq(variables['should_retry'], 'true'))

scripts/terraform_force_unlock.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,41 +26,70 @@
2626

2727

2828
@click.command()
29-
@click.option("--min-age-hr", type=int, default=8)
30-
@click.option("--key-prefix", type=str, default="nhsd-apm-management-ptl-terraform/env:/api-deployment:ptl:")
29+
@click.option("--min-age-hr", type=int, default=4)
30+
@click.option(
31+
"--key-prefix",
32+
type=str,
33+
default="nhsd-apm-management-ptl-terraform/env:/api-deployment:ptl:",
34+
)
3135
@click.option("--table-name", type=str, default="terraform-state-lock")
3236
@click.option("--profile", type=str, default="apm_ptl")
3337
def main(min_age_hr, key_prefix, table_name, profile):
34-
3538
accepted_envs = ["apm_ptl", "apm_prod"]
3639

3740
if profile not in accepted_envs:
3841
raise ValueError("Profile must be apm_ptl or apm_prod")
3942

40-
terraform_lock_table = boto3.Session(profile_name=profile).resource("dynamodb").Table(table_name)
43+
terraform_lock_table = (
44+
boto3.Session(profile_name=profile).resource("dynamodb").Table(table_name)
45+
)
4146

4247
filter_expr = "begins_with(#n0, :v0) AND attribute_exists(#n1)"
4348

4449
ExpressionAttributeNames = {"#n0": "LockID", "#n1": "Info"}
4550
ExpressionAttributeValues = {
4651
":v0": key_prefix,
4752
}
48-
items = terraform_lock_table.scan(FilterExpression=filter_expr, ExpressionAttributeNames=ExpressionAttributeNames, ExpressionAttributeValues=ExpressionAttributeValues)
49-
print(f"Found {len(items['Items'])} locks which start with key prefix '{key_prefix}'")
53+
items = terraform_lock_table.scan(
54+
FilterExpression=filter_expr,
55+
ExpressionAttributeNames=ExpressionAttributeNames,
56+
ExpressionAttributeValues=ExpressionAttributeValues,
57+
)
58+
59+
total_items = items["Items"]
60+
61+
while "LastEvaluatedKey" in items:
62+
items = terraform_lock_table.scan(
63+
FilterExpression=filter_expr,
64+
ExpressionAttributeNames=ExpressionAttributeNames,
65+
ExpressionAttributeValues=ExpressionAttributeValues,
66+
ExclusiveStartKey=items["LastEvaluatedKey"],
67+
)
68+
total_items.extend(items["Items"])
69+
70+
print(
71+
f"Found {len(items['Items'])} locks which start with key prefix '{key_prefix}'"
72+
)
5073

5174
removed_count = 0
52-
for lock_item in items["Items"]:
75+
for lock_item in total_items:
5376
lock_item_info = json.loads(lock_item["Info"])
5477
lock_id = lock_item["LockID"]
5578
created_at = dateutil.parser.parse(lock_item_info["Created"])
5679

57-
if datetime.datetime.now(datetime.timezone.utc) - created_at > datetime.timedelta(hours=min_age_hr):
58-
print(f"{lock_id} {created_at=} is more than {min_age_hr} hours old, deleting lock...")
80+
if datetime.datetime.now(
81+
datetime.timezone.utc
82+
) - created_at > datetime.timedelta(hours=min_age_hr):
83+
print(
84+
f"{lock_id} {created_at=} is more than {min_age_hr} hours old, deleting lock..."
85+
)
5986
terraform_lock_table.delete_item(Key={"LockID": lock_id})
6087
removed_count += 1
6188

6289
else:
63-
print(f"{lock_id} {created_at=} is not more than {min_age_hr} hours old, leaving it alone!")
90+
print(
91+
f"{lock_id} {created_at=} is not more than {min_age_hr} hours old, leaving it alone!"
92+
)
6493

6594
print(f"Removed {removed_count} locks")
6695

0 commit comments

Comments
 (0)