Skip to content

Commit 2619ccf

Browse files
committed
move order_blocks functionality in sequence
1 parent 6c1f7aa commit 2619ccf

2 files changed

Lines changed: 37 additions & 13 deletions

File tree

blocksequence/cli.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def main(ctx, source_host, source_db, source_user, source_pass, out):
5454
# add subcommands
5555
main.add_command(utils.node_weights)
5656
main.add_command(sequence.sequence)
57-
main.add_command(utils.order_blocks)
5857
main.add_command(utils.t_intersections)
5958
main.add_command(utils.start_points)
6059

blocksequence/sequence/commands.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import click
1010
import networkx as nx
11+
import numpy as np
1112
import pandas as pd
1213
import psycopg2
1314
from sqlalchemy.ext.automap import automap_base
@@ -104,24 +105,21 @@ def sequence_geo(src_url, dest_url, bf_tbl, parent_geo_uid, pid, weight_field, n
104105
g = nx.convert_matrix.from_pandas_edgelist(all_edges, 'start_node', 'end_node', True, nx.MultiGraph)
105106
logger.debug("MultiGraph with %s nodes and %s edges built for %s", len(g.nodes()), len(g.edges()), pid)
106107

107-
flat_edgelist = []
108-
109108
# ensure the graph is connected, otherwise it can't be made into a eulerian circuit
110109
is_connected = nx.is_connected(g)
111110
logger.debug("%s graph is fully connected: %s", pid, is_connected)
112111
if not is_connected:
113112
logger.error("Disconnected graph found for %s. Sequencing subgraphs.", pid)
114-
for comp in nx.connected_components(g):
115-
g_sub = g.subgraph(comp)
116-
flat_edgelist.extend(sequence_edges(g_sub, dest_db, parent_geo_uid, pid, weight_field, node_limit))
113+
all_edges = pd.concat([sequence_edges(g.subgraph(comp), dest_db, parent_geo_uid, pid, weight_field, node_limit) for comp in nx.connected_components(g)])
114+
# for comp in nx.connected_components(g):
115+
# g_sub = g.subgraph(comp)
116+
# edgelist.append(sequence_edges(g_sub, dest_db, parent_geo_uid, pid, weight_field, node_limit))
117117
else:
118-
flat_edgelist = sequence_edges(g, dest_db, parent_geo_uid, pid, weight_field, node_limit)
118+
all_edges = sequence_edges(g, dest_db, parent_geo_uid, pid, weight_field, node_limit)
119119

120-
# leverage pandas to write all the edge data to the database
121-
logger.debug("Generating dataframe for %s", pid)
122-
edge_sequence = pd.DataFrame.from_records(flat_edgelist)
123-
# don't waste time sorting - the DB can do that
124-
# edge_sequence.sort_values(by='sequence', inplace=True)
120+
all_edges['edge_order'] = all_edges.sort_values('seq').groupby('block', sort=False).cumcount()+1
121+
all_edges['block_order'] = all_edges.sort_values('seq').groupby('block', sort=False).ngroup()+1
122+
all_edges['chain_id'] = np.where(all_edges['eo'] == 1, 1, 0)
125123

126124
# write the edge list to the outputs db
127125
logger.info("Writing %s %s sequence results to database", parent_geo_uid, pid)
@@ -223,7 +221,13 @@ def sequence_edges(g, dest_db, parent_geo_uid, pid, weight_field, node_limit):
223221
# make sure the graph was actually calculated
224222
if shortest_distance == -1:
225223
logger.critical("No possible circuit found for %s %s using %s start nodes", parent_geo_uid, pid, node_limit)
226-
return []
224+
return pd.DataFrame()
225+
226+
# set the sequence on the graph
227+
for i, e in enumerate(nx.eulerian_circuit(g)):
228+
set_edge_sequence(g, e, i)
229+
230+
return nx.to_pandas_edgelist(g)
227231

228232
# find the circuit with the shortest distance
229233
graph_distance = sum(nx.get_edge_attributes(g, weight_field).values())
@@ -242,6 +246,27 @@ def sequence_edges(g, dest_db, parent_geo_uid, pid, weight_field, node_limit):
242246
logger.debug("sequence_edges end")
243247
return flat_edgelist
244248

249+
def set_edge_sequence(graph, edge, seq):
250+
"""Set a sequence value on the given graph edge."""
251+
252+
# get the number of edges in this set
253+
sides = graph.number_of_edges(edge[0], edge[1])
254+
255+
# iterate the edges, marking the first one we see with the sequence
256+
for s in range(sides):
257+
# get the data for this edge
258+
data = graph.get_edge_data(edge[0],edge[1],s)
259+
260+
# if this one has already been seen, skip it
261+
if 'seq' in data:
262+
continue
263+
264+
# set the sequence on this edge
265+
graph[edge[0]][edge[1]][s]['seq'] = seq
266+
267+
# after marking an edge, bail to avoid putting the same sequence on more than one edge
268+
return
269+
245270
def get_shortest_paths_distances(graph, pairs, edge_weight_name):
246271
"""Compute the shortest distance between each pair of nodes in a graph.
247272

0 commit comments

Comments
 (0)