-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter_mentions.py
More file actions
41 lines (29 loc) · 797 Bytes
/
filter_mentions.py
File metadata and controls
41 lines (29 loc) · 797 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""
This script should filter trash mentions
"""
import sys
import re
input_filename = sys.argv[1]
output_filename = sys.argv[2]
def check_mention(mention):
word_count = mention.count(' ')
if word_count > 2:
return False
length = len(mention)
if length > 50:
return False
numbers = sum(c.isdigit() for c in mention)
chars = sum(c.isalpha() for c in mention)
other = length - numbers - chars
if numbers > chars:
return False
if other > chars:
return False
if re.match(r'^[\s\d\w\-\,]*$', mention) is None:
return False
return True
with open(input_filename) as inp:
with open(output_filename, 'w') as out:
for line in inp:
if check_mention(line):
out.write(line)