Skip to content

Commit cbbed20

Browse files
authored
Merge pull request #88 from yuunnn/update_multi_search
Add support for specifying the starting page for "multi_page_search" …
2 parents f863961 + 373a2e4 commit cbbed20

3 files changed

Lines changed: 21 additions & 11 deletions

File tree

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ The following are the interfaces and instructions provided by the SDK:
634634

635635
1.dork_search(dork, page=0, resource="host", facets=None)
636636
search the data of the specified page according to dork
637-
2.multi_page_search(dork, page=1, resource="host", facets=None)
637+
2.multi_page_search(dork, page=1, start_page=1, resource="host", facets=None)
638638
search multiple pages of data according to dork
639639
3.resources_info()
640640
get current user information

docs/README_CN.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,8 @@ zm = ZoomEye(api_key="01234567-acbd-00000-1111-22222222222")
506506

507507
1.dork_search(dork, page=0, resource="host", facets=None)
508508
根据 dork 搜索指定页的数据
509-
2.multi_page_search(dork, page=1, resource="host", facets=None)
510-
根据 dork 搜索多页数据
509+
2.multi_page_search(dork, page=1, start_page=1, resource="host", facets=None)
510+
根据 dork 搜索多页数据,支持从指定的页数开始下载
511511
3.resources_info()
512512
获取当前用户的信息
513513
4.show_count()
@@ -557,7 +557,7 @@ soft********11180040.b***c.net ['126.***.***.40']
557557
{'product': [{'name': '', 'count': 28323128}, {'name': 'BusyBox telnetd', 'count': 10180912}, {'name': 'Linux telnetd', ......
558558
```
559559

560-
>`multi_page_search()` 同样也可以进行搜索,当需要获取大量数据时使用该函数,其中 `page` 字段表示获取多少页的数据;而 `dork_search()` 仅获取指定页的数据。
560+
>`multi_page_search()` 同样也可以进行搜索,当需要获取大量数据时使用该函数,其中 `page` 字段表示获取多少页的数据, `start_page`字段表示从第几页开始获取;而 `dork_search()` 仅获取指定页的数据。
561561

562562
#### 5.数据筛选
563563
SDK 中提供了 `dork_filter()` 函数,我们可以更加方便对数据进行筛选,提取指定的数据字段,如下:

zoomeye/sdk.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ def _check_header(self):
142142
else:
143143
headers = {}
144144
# add user agent
145-
headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
145+
headers[
146+
"User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
146147
return headers
147148

148149
def dork_search(self, dork, page=0, resource="host", facets=None):
@@ -178,8 +179,8 @@ def dork_search(self, dork, page=0, resource="host", facets=None):
178179

179180
return result
180181

181-
def multi_page_search(self, dork, page=1, resource="host",
182-
facets=None) -> list:
182+
def multi_page_search(self, dork, page=1, start_page=1, resource="host",
183+
facets=None) -> (list, int, str):
183184
"""
184185
mainly used to search dork data from zoomeye data.
185186
please see: https://www.zoomeye.org/doc#host-search and
@@ -189,6 +190,8 @@ def multi_page_search(self, dork, page=1, resource="host",
189190
dork to search
190191
:param page: int,
191192
specify the number of pages to return data, each page contains 20 data
193+
:param start_page: int,
194+
specify the number of start page to search
192195
:param resource: str,
193196
host search or web search
194197
:param facets: list or tuple
@@ -207,12 +210,20 @@ def multi_page_search(self, dork, page=1, resource="host",
207210

208211
dork_data = []
209212
all_data = []
210-
for i in range(page):
213+
is_search_done = "done"
214+
for i in range(start_page - 1, page):
215+
print("downloading contents from page{}".format(i+1))
211216
if isinstance(facets, (tuple, list)):
212217
facets = ','.join(facets)
213218

214219
params = {'query': dork, 'page': i + 1, 'facets': facets}
215-
result = self._request(search_api, params=params, headers=headers)
220+
try:
221+
result = self._request(search_api, params=params, headers=headers)
222+
except Exception as e:
223+
# return the processed data
224+
self.data_list = dork_data
225+
self.raw_data = all_data
226+
return dork_data, i, "search failed, the log as {}".format(e)
216227
if result and "matches" in result:
217228
self.total = result.get("total")
218229
all_data.append(result)
@@ -231,7 +242,7 @@ def multi_page_search(self, dork, page=1, resource="host",
231242
# i added it to a list for easy viewing of each piece of data
232243
self.raw_data = all_data
233244
# return processed data
234-
return dork_data
245+
return dork_data, page, is_search_done
235246

236247
def resources_info(self) -> dict:
237248
"""
@@ -357,7 +368,6 @@ def generate_dot(self, q, source=0, page=1):
357368
return True, "successful! saving in {}".format(os.getcwd())
358369

359370

360-
361371
def show_site_ip(data):
362372
"""
363373
show web search

0 commit comments

Comments
 (0)