Skip to content
This repository was archived by the owner on Nov 3, 2021. It is now read-only.

Commit 1e7d2fa

Browse files
authored
Merge pull request #1462 from mozilla/fixup_duplicates_geomodel
Add test examples for geomodel alerts
2 parents 740b8ee + 7e9e6de commit 1e7d2fa

13 files changed

Lines changed: 1467 additions & 210 deletions

File tree

alerts/generic_alert_loader.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@ def main(self):
115115
for cfg in self.configs:
116116
try:
117117
self.process_alert(cfg)
118-
except Exception:
118+
except Exception as err:
119+
self.error_thrown = err
119120
traceback.print_exc(file=sys.stdout)
120-
logger.error("Processing rule file {} failed".format(cfg.__str__()))
121+
logger.exception("Processing rule file {} failed".format(cfg.__str__()))
121122

122123
def onAggregation(self, aggreg):
123124
# aggreg['count']: number of items in the aggregation, ex: number of failed login attempts

alerts/geomodel/alert.py

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import math
22
from operator import attrgetter
3-
from typing import NamedTuple, Optional
3+
from typing import List, NamedTuple, Optional
44

5-
from .locality import State, Locality
5+
from .locality import Locality, distance as geo_distance
66

77

8-
_AIR_TRAVEL_SPEED = 1000.0 # km/h
9-
10-
_EARTH_RADIUS = 6373.0 # km # approximate
8+
_AIR_TRAVEL_SPEED = 277.778 # m/s
119

1210
# TODO: Switch to dataclasses when we move to Python3.7+
1311

@@ -16,20 +14,30 @@ class Origin(NamedTuple):
1614
'''A description of a location.
1715
'''
1816

17+
ip: str
1918
city: str
2019
country: str
2120
latitude: float
2221
longitude: float
2322
geopoint: str
2423

2524

25+
class Hop(NamedTuple):
26+
'''Describes a hop from one location to another that would be
27+
physically impossible in the time between a user's activity in each
28+
location.
29+
'''
30+
31+
origin: Origin
32+
destination: Origin
33+
34+
2635
class Alert(NamedTuple):
2736
'''A container for the data the alerts output by GeoModel contain.
2837
'''
2938

3039
username: str
31-
sourceipaddress: str
32-
origin: Origin
40+
hops: List[Hop]
3341

3442

3543
def _travel_possible(loc1: Locality, loc2: Locality) -> bool:
@@ -38,53 +46,61 @@ def _travel_possible(loc1: Locality, loc2: Locality) -> bool:
3846
actions took place.
3947
'''
4048

41-
lat1 = math.radians(loc1.latitude)
42-
lat2 = math.radians(loc2.latitude)
43-
lon1 = math.radians(loc1.longitude)
44-
lon2 = math.radians(loc2.longitude)
49+
dist_traveled = 1000 * geo_distance(loc1, loc2) # Convert to metres
4550

46-
dlat = lat2 - lat1
47-
dlon = lon2 - lon1
48-
49-
a = math.sin(dlat / 2.0) ** 2 +\
50-
math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2.0) ** 2
51-
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
52-
53-
distance = c * _EARTH_RADIUS
54-
55-
seconds_between = (loc2.lastaction - loc1.lastaction).total_seconds()
56-
hours_between = math.ceil(seconds_between / 60.0 / 60.0)
51+
seconds_between = abs((loc2.lastaction - loc1.lastaction).total_seconds())
5752

5853
# We pad the time with an hour to account for things like planes being
5954
# slowed, network delays, etc.
60-
return (distance / _AIR_TRAVEL_SPEED) <= (hours_between - 1)
55+
ttt = (dist_traveled / _AIR_TRAVEL_SPEED) # Time to travel the distance.
56+
pad = math.ceil((1000 * min(loc1.radius, loc2.radius)) / _AIR_TRAVEL_SPEED)
57+
58+
return (ttt - pad) <= seconds_between
6159

6260

63-
def alert(user_state: State) -> Optional[Alert]:
61+
def alert(
62+
username: str,
63+
from_evts: List[Locality],
64+
from_es: List[Locality]
65+
) -> Optional[Alert]:
6466
'''Determine whether an alert should fire given a particular user's
6567
locality state. If an alert should fire, an `Alert` is returned, otherwise
6668
this function returns `None`.
6769
'''
6870

69-
locs_to_consider = sorted(user_state.localities, key=attrgetter('lastaction'))
71+
relevant_es = sorted(from_es, key=attrgetter('lastaction'), reverse=True)[0:1]
72+
all_evts = sorted(from_evts, key=attrgetter('lastaction'))
73+
locs_to_consider = relevant_es + all_evts
7074

7175
if len(locs_to_consider) < 2:
7276
return None
7377

74-
locations = locs_to_consider[-2:]
75-
76-
if _travel_possible(*locations):
78+
pairs = [
79+
(locs_to_consider[i], locs_to_consider[i + 1])
80+
for i in range(len(locs_to_consider) - 1)
81+
]
82+
83+
hops = [
84+
Hop(
85+
Origin(
86+
o.sourceipaddress,
87+
o.city,
88+
o.country,
89+
o.latitude,
90+
o.longitude,
91+
'{},{}'.format(o.latitude, o.longitude)),
92+
Origin(
93+
d.sourceipaddress,
94+
d.city,
95+
d.country,
96+
d.latitude,
97+
d.longitude,
98+
'{},{}'.format(d.latitude, d.longitude)))
99+
for (o, d) in pairs
100+
if not _travel_possible(o, d)
101+
]
102+
103+
if len(hops) == 0:
77104
return None
78105

79-
(ip, city, country, lat, lon) = (
80-
locations[1].sourceipaddress,
81-
locations[1].city,
82-
locations[1].country,
83-
locations[1].latitude,
84-
locations[1].longitude
85-
)
86-
87-
geo = '{0},{1}'.format(lat, lon)
88-
origin = Origin(city, country, lat, lon, geo)
89-
90-
return Alert(user_state.username, ip, origin)
106+
return Alert(username, hops)

alerts/geomodel/config.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ class Events(NamedTuple):
1818

1919
search_window: dict
2020
lucene_query: str
21-
username_path: str
2221

2322

2423
class Whitelist(NamedTuple):
@@ -38,5 +37,5 @@ class Config(NamedTuple):
3837
'''
3938

4039
localities: Localities
41-
events: List[Events]
40+
events: Events
4241
whitelist: Whitelist

alerts/geomodel/execution.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from datetime import datetime
2+
from typing import Callable, NamedTuple, Optional
3+
4+
from mozdef_util.elasticsearch_client import ElasticsearchClient as ESClient
5+
from mozdef_util.query_models import SearchQuery, TermMatch
6+
from mozdef_util.utilities.toUTC import toUTC
7+
8+
9+
_TYPE_NAME = 'execution_state'
10+
11+
12+
class ExecutionState(NamedTuple):
13+
'''A record of an alert's execution at a particular time, used to create a
14+
sliding window through which an alert can query for relevant events and
15+
not run the risk of missing any due to relying only on searching some
16+
configured amount of time in the past.
17+
'''
18+
19+
type_: str
20+
# alert_name: str
21+
execution_time: datetime
22+
23+
def new(executed_at: Optional[datetime]=None) -> 'ExecutionState':
24+
'''Construct a new `ExecutionState` representing the execution of an
25+
alert at a specific time.
26+
By default, the execution time will be set to when this function is
27+
called if not explicitly provided.
28+
'''
29+
30+
if executed_at is None:
31+
executed_at = toUTC(datetime.now())
32+
33+
return ExecutionState(_TYPE_NAME, executed_at)
34+
35+
36+
class Record(NamedTuple):
37+
'''A container for data identifying an `ExecutionState` in ElasticSearch.
38+
'''
39+
40+
identifier: Optional[str]
41+
state: ExecutionState
42+
43+
def new(state: ExecutionState) -> 'Record':
44+
'''Construct a new `Record` that, when stored, will result in a new
45+
document being inserted into ElasticSearch.
46+
'''
47+
48+
return Record('', state)
49+
50+
51+
Index = str
52+
StoreInterface = Callable[[Record, Index], None]
53+
LoadInterface = Callable[[Index], Optional[Record]]
54+
55+
56+
def _dict_take(dictionary, keys):
57+
return {key: dictionary[key] for key in keys}
58+
59+
60+
def store(client: ESClient) -> StoreInterface:
61+
'''Wrap an `ElasticsearchClient` in a `StoreInterface` closure to be
62+
invoked without requiring direct access to the client in order to
63+
persist an `ExecutionState`.
64+
'''
65+
66+
def wrapper(record: Record, esindex: Index):
67+
doc = dict(record.state._asdict())
68+
69+
client.save_object(index=esindex, body=doc, doc_id=record.identifier)
70+
71+
return wrapper
72+
73+
74+
def load(client: ESClient) -> LoadInterface:
75+
'''Wrap an `ElasticsearchClient` in a `LoadInterface` closure to be
76+
invoed without requiring direct access to the client in order to retrieve
77+
an `ExecutionState`.
78+
'''
79+
80+
def wrapper(esindex: Index=None) -> Optional[Record]:
81+
query = SearchQuery()
82+
query.add_must(TermMatch('type_', _TYPE_NAME))
83+
84+
results = query.execute(client, indices=[esindex])
85+
86+
if len(results['hits']) == 0:
87+
return None
88+
89+
eid = results['hits'][0]['_id']
90+
91+
state = ExecutionState(**_dict_take(
92+
results['hits'][0].get('_source', {}),
93+
ExecutionState._fields))
94+
95+
return Record(eid, state)
96+
97+
return wrapper

alerts/geomodel/locality.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import math
12
from datetime import datetime, timedelta
23
from typing import Any, Callable, Dict, List, NamedTuple, Optional
34

@@ -9,6 +10,8 @@
910
# Default radius (in Kilometres) that a locality should have.
1011
_DEFAULT_RADIUS_KM = 50.0
1112

13+
_EARTH_RADIUS = 6373.0 # km # approximate
14+
1215
# TODO: Switch to dataclasses when we move to Python3.7+
1316

1417

@@ -62,17 +65,6 @@ class Update(NamedTuple):
6265
state: State
6366
did_update: bool
6467

65-
def flat_map(fn: Callable[[State], 'Update'], u: 'Update') -> 'Update':
66-
'''Apply a function to a `State` that produces an `Update` against the
67-
state contained within an established `Update`. The resulting `Update`
68-
will have its `did_update` field set to `True` if either the original
69-
or the new `Update` are `True`.
70-
'''
71-
72-
new = fn(u.state)
73-
74-
return Update(new.state, u.did_update or new.did_update)
75-
7668

7769
JournalInterface = Callable[[Entry, str], None]
7870
QueryInterface = Callable[[SearchQuery, str], Optional[Entry]]
@@ -191,7 +183,7 @@ def update(state: State, from_evt: State) -> Update:
191183
# If we find that the new state's locality has been recorded
192184
# for the user in question, we only want to update it if either
193185
# their IP changed or the new time of activity is more recent.
194-
if loc1.city == loc2.city and loc1.country == loc2.country:
186+
if distance(loc1, loc2) <= min(loc1.radius, loc2.radius):
195187
did_find = True
196188

197189
new_more_recent = loc1.lastaction > loc2.lastaction
@@ -229,3 +221,23 @@ def remove_outdated(state: State, days_valid: int) -> Update:
229221
return Update(
230222
state=State(state.type_, state.username, new_localities),
231223
did_update=len(new_localities) != len(state.localities))
224+
225+
226+
def distance(loc1: Locality, loc2: Locality) -> float:
227+
'''Compute the distance between two localities, returning the geographical
228+
distance in kilometres (KM).
229+
'''
230+
231+
lat1 = math.radians(loc1.latitude)
232+
lat2 = math.radians(loc2.latitude)
233+
lon1 = math.radians(loc1.longitude)
234+
lon2 = math.radians(loc2.longitude)
235+
236+
dlat = lat2 - lat1
237+
dlon = lon2 - lon1
238+
239+
a = math.sin(dlat / 2.0) ** 2 +\
240+
math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2.0) ** 2
241+
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
242+
243+
return c * _EARTH_RADIUS

alerts/geomodel_location.json

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,12 @@
44
"valid_duration_days": 1,
55
"radius_kilometres": 50
66
},
7-
"events": [
8-
{
7+
"events": {
98
"search_window": {
109
"minutes": 5
1110
},
12-
"lucene_query": "tags:auth0",
13-
"username_path": "details.username"
14-
}
15-
],
11+
"lucene_query": "tags:auth0"
12+
},
1613
"whitelist": {
1714
"users": [],
1815
"cidrs": []

0 commit comments

Comments
 (0)