Skip to content

Commit 131a323

Browse files
committed
Merge branch 'master' of github.com:hatnote/wikimon
2 parents e023a49 + 3883f50 commit 131a323

4 files changed

Lines changed: 91 additions & 47 deletions

File tree

README.md

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -43,32 +43,52 @@ optional arguments:
4343
Here are a couple example messages, as broadcast over WebSocket:
4444

4545
```json
46-
47-
{"is_minor": false,
48-
"page_title": "Template:Citation needed/testcases",
49-
"url": "http://en.wikipedia.org/w/index.php?diff=553804313&oldid=479472901",
50-
"is_unpatrolled": false,
51-
"is_bot": false,
52-
"is_new": false,
53-
"summary": null,
54-
"flags": null,
55-
"user": "98.172.160.184",
56-
"is_anon": true,
57-
"ns": "Template",
58-
"change_size": "+42"}
59-
60-
{"is_minor": true,
61-
"page_title": "Generalized anxiety disorder",
62-
"url": "http://en.wikipedia.org/w/index.php?diff=553804315&oldid=553370901",
63-
"is_unpatrolled": false,
64-
"is_bot": false,
65-
"is_new": false,
66-
"summary": "minor editing in sentences.",
67-
"flags": "M",
68-
"user": "BriannaMaxim",
69-
"is_anon": false,
70-
"ns": "Main",
71-
"change_size": "+1"}
46+
{
47+
"action": "edit",
48+
"change_size": 19,
49+
"flags": "M",
50+
"hashtags": [],
51+
"is_anon": false,
52+
"is_bot": false,
53+
"is_minor": true,
54+
"is_new": false,
55+
"is_unpatrolled": false,
56+
"mentions": [],
57+
"ns": "User talk",
58+
"page_title": "User talk:Manxruler",
59+
"parent_rev_id": "775894803",
60+
"rev_id": "775894650",
61+
"summary": "/* The battle of Kristiansand (1940) */",
62+
"url": "https://en.wikipedia.org/w/index.php?diff=775894803&oldid=775894650",
63+
"user": "Carsten R D"
64+
}
65+
66+
{
67+
"action": "edit",
68+
"change_size": -12,
69+
"flags": null,
70+
"geo_ip": {
71+
"city": "Salisbury",
72+
"country_name": "United States",
73+
"latitude": 38.3761,
74+
"longitude": -75.6086,
75+
"region_name": "Maryland"
76+
},
77+
"hashtags": [],
78+
"is_anon": true,
79+
"is_bot": false,
80+
"is_minor": false,
81+
"is_new": false,
82+
"is_unpatrolled": false,
83+
"mentions": [],
84+
"ns": "Main",
85+
"page_title": "Evanescence (Evanescence album)",
86+
"parent_rev_id": "775894800",
87+
"rev_id": "774995266",
88+
"summary": "/* Credits and personnel */ \"Personnel\" is sufficient",
89+
"url": "https://en.wikipedia.org/w/index.php?diff=775894800&oldid=774995266",
90+
"user": "71.200.123.192"
91+
}
7292
```
7393

7494
As you can see, the set of keys sent is always the same. Note that the
@@ -77,16 +97,7 @@ As you can see, the set of keys sent is always the same. Note that the
7797

7898
## Geolocation
7999

80-
Geolocation is currently done through a local FreeGeoIP
81-
instance. FreeGeoIP requires Go and several Go libraries. It also
82-
requires memcached to be running on port 11211.
83-
84-
The command used to run FreeGeoIP at the moment:
85-
86-
```
87-
GOPATH=/home/hatnote/gopkg/ GOROOT=/home/hatnote/go nohup /home/hatnote/go/bin/go run freegeoip.go &
88-
```
89-
100+
Geolocation is done in process, using maxmind's free dataset. See the GeoDB directory for more info.
90101

91102
## See also
92103

requirements.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
-e git+https://github.com/mahmoud/wapiti.git#egg=wapiti
2-
3-
Twisted>=13.1.0
41
autobahn==0.5.14
2+
py==1.4.26
53
pytest==2.6.1
6-
python-geoip
7-
python-geoip-geolite2
4+
python-geoip==1.2
5+
python-geoip-geolite2==2014.207
6+
Twisted==14.0.2
7+
-e git+https://github.com/mahmoud/wapiti.git@5dc462b1e0d2290c1841980c94309efe4c27b75a#egg=wapiti-master
8+
zope.interface==4.1.2

setup.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
A WebSocket-oriented monitor for streaming live changes to
88
Wikipedia. (also, wikimon, wikital monsters)
99
10-
:copyright: (c) 2013 by Mahmoud Hashemi and Stephen LaPorte
10+
:copyright: (c) 2013-2015 by Mahmoud Hashemi and Stephen LaPorte
1111
:license: GPLv3, see LICENSE for more details.
1212
1313
"""
@@ -17,7 +17,7 @@
1717

1818

1919
__author__ = 'Mahmoud Hashemi and Stephen LaPorte'
20-
__version__ = '0.5.2'
20+
__version__ = '0.6.3dev'
2121
__contact__ = 'mahmoudrhashemi@gmail.com'
2222
__url__ = 'https://github.com/hatnote/wikimon'
2323
__license__ = 'GPLv3'
@@ -44,7 +44,11 @@
4444
packages=['wikimon'],
4545
include_package_data=True,
4646
zip_safe=False,
47-
install_requires=['wapiti', 'Twisted==13.0.0', 'autobahn==0.5.14'],
47+
install_requires=['wapiti',
48+
'Twisted==13.0.0',
49+
'autobahn==0.5.14',
50+
'python-geoip==1.2',
51+
'python-geoip-geolite2==2014.207'],
4852
license=__license__,
4953
platforms='any',
5054
classifiers=[

wikimon/parsers.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
HASHTAG_RE = re.compile("(?:^|\s)[##]{1}(\w+)", re.UNICODE)
1717
MENTION_RE = re.compile("(?:^|\s)[@ @]{1}([^\s#<>[\]|{}]+)", re.UNICODE)
1818

19+
_SECTION_TITLE_RE = re.compile("\/\*\s*(?P<section_title>.+)\s*\*\/"
20+
"(?P<real_summary>.*)", re.UNICODE)
21+
1922
NON_MAIN_NS = ['Talk',
2023
'User',
2124
'User talk',
@@ -89,6 +92,28 @@ def parse_revs_from_url(url):
8992
raise ValueError('unparsable url: %r' % (url,))
9093

9194

95+
def parse_section_title(summary):
96+
"""This function tries to extract the section title as it would be
97+
automatically generated into the commit message when a user clicks
98+
the edit button next to a section. The behavior is basically
99+
identical to MediaWiki's, which is to say that it is simple. If
100+
the commit message was generated otherwise or manually rewritten,
101+
this function will not automatically figure that out.
102+
103+
Returns a tuple of section_title and "real" commit message
104+
(message with the section title part removed).
105+
"""
106+
match = _SECTION_TITLE_RE.match(summary)
107+
if not match:
108+
return '', summary.strip()
109+
match_map = match.groupdict()
110+
section_title = match_map['section_title']
111+
real_summary = match_map['real_summary']
112+
section_title = section_title.strip() if section_title else ''
113+
real_summary = real_summary.strip() if real_summary else ''
114+
return section_title, real_summary
115+
116+
92117
def parse_irc_message(message, ns_map=DEFAULT_NS_MAP):
93118
ret = PARSE_EDIT_RE.match(message)
94119
msg_dict = {'is_new': False,
@@ -140,10 +165,13 @@ def parse_irc_message(message, ns_map=DEFAULT_NS_MAP):
140165
msg_dict.setdefault('user', None)
141166
msg_dict['is_anon'] = is_ip(msg_dict['user'])
142167

143-
if msg_dict['summary']:
144-
msg_dict['hashtags'] = HASHTAG_RE.findall(msg_dict['summary'])
145-
msg_dict['mentions'] = MENTION_RE.findall(msg_dict['summary'])
168+
summary = msg_dict['summary']
169+
if summary:
170+
msg_dict['section'], msg_dict['parsed_summary'] = parse_section_title(summary)
171+
msg_dict['hashtags'] = HASHTAG_RE.findall(summary)
172+
msg_dict['mentions'] = MENTION_RE.findall(summary)
146173
else:
174+
msg_dict['section'], msg_dict['parsed_summary'] = '', summary
147175
msg_dict['hashtags'] = []
148176
msg_dict['mentions'] = []
149177

0 commit comments

Comments
 (0)