88
99import click
1010import networkx as nx
11+ import numpy as np
1112import pandas as pd
1213import psycopg2
1314from sqlalchemy .ext .automap import automap_base
@@ -104,24 +105,21 @@ def sequence_geo(src_url, dest_url, bf_tbl, parent_geo_uid, pid, weight_field, n
104105 g = nx .convert_matrix .from_pandas_edgelist (all_edges , 'start_node' , 'end_node' , True , nx .MultiGraph )
105106 logger .debug ("MultiGraph with %s nodes and %s edges built for %s" , len (g .nodes ()), len (g .edges ()), pid )
106107
107- flat_edgelist = []
108-
109108 # ensure the graph is connected, otherwise it can't be made into a eulerian circuit
110109 is_connected = nx .is_connected (g )
111110 logger .debug ("%s graph is fully connected: %s" , pid , is_connected )
112111 if not is_connected :
113112 logger .error ("Disconnected graph found for %s. Sequencing subgraphs." , pid )
114- for comp in nx .connected_components (g ):
115- g_sub = g .subgraph (comp )
116- flat_edgelist .extend (sequence_edges (g_sub , dest_db , parent_geo_uid , pid , weight_field , node_limit ))
113+ all_edges = pd .concat ([sequence_edges (g .subgraph (comp ), dest_db , parent_geo_uid , pid , weight_field , node_limit ) for comp in nx .connected_components (g )])
114+ # for comp in nx.connected_components(g):
115+ # g_sub = g.subgraph(comp)
116+ # edgelist.append(sequence_edges(g_sub, dest_db, parent_geo_uid, pid, weight_field, node_limit))
117117 else :
118- flat_edgelist = sequence_edges (g , dest_db , parent_geo_uid , pid , weight_field , node_limit )
118+ all_edges = sequence_edges (g , dest_db , parent_geo_uid , pid , weight_field , node_limit )
119119
120- # leverage pandas to write all the edge data to the database
121- logger .debug ("Generating dataframe for %s" , pid )
122- edge_sequence = pd .DataFrame .from_records (flat_edgelist )
123- # don't waste time sorting - the DB can do that
124- # edge_sequence.sort_values(by='sequence', inplace=True)
120+ all_edges ['edge_order' ] = all_edges .sort_values ('seq' ).groupby ('block' , sort = False ).cumcount ()+ 1
121+ all_edges ['block_order' ] = all_edges .sort_values ('seq' ).groupby ('block' , sort = False ).ngroup ()+ 1
122+ all_edges ['chain_id' ] = np .where (all_edges ['eo' ] == 1 , 1 , 0 )
125123
126124 # write the edge list to the outputs db
127125 logger .info ("Writing %s %s sequence results to database" , parent_geo_uid , pid )
@@ -223,7 +221,13 @@ def sequence_edges(g, dest_db, parent_geo_uid, pid, weight_field, node_limit):
223221 # make sure the graph was actually calculated
224222 if shortest_distance == - 1 :
225223 logger .critical ("No possible circuit found for %s %s using %s start nodes" , parent_geo_uid , pid , node_limit )
226- return []
224+ return pd .DataFrame ()
225+
226+ # set the sequence on the graph
227+ for i , e in enumerate (nx .eulerian_circuit (g )):
228+ set_edge_sequence (g , e , i )
229+
230+ return nx .to_pandas_edgelist (g )
227231
228232 # find the circuit with the shortest distance
229233 graph_distance = sum (nx .get_edge_attributes (g , weight_field ).values ())
@@ -242,6 +246,27 @@ def sequence_edges(g, dest_db, parent_geo_uid, pid, weight_field, node_limit):
242246 logger .debug ("sequence_edges end" )
243247 return flat_edgelist
244248
249+ def set_edge_sequence (graph , edge , seq ):
250+ """Set a sequence value on the given graph edge."""
251+
252+ # get the number of edges in this set
253+ sides = graph .number_of_edges (edge [0 ], edge [1 ])
254+
255+ # iterate the edges, marking the first one we see with the sequence
256+ for s in range (sides ):
257+ # get the data for this edge
258+ data = graph .get_edge_data (edge [0 ],edge [1 ],s )
259+
260+ # if this one has already been seen, skip it
261+ if 'seq' in data :
262+ continue
263+
264+ # set the sequence on this edge
265+ graph [edge [0 ]][edge [1 ]][s ]['seq' ] = seq
266+
267+ # after marking an edge, bail to avoid putting the same sequence on more than one edge
268+ return
269+
245270def get_shortest_paths_distances (graph , pairs , edge_weight_name ):
246271 """Compute the shortest distance between each pair of nodes in a graph.
247272
0 commit comments