Skip to content

Commit 42605a8

Browse files
authored
Update graph_analysis.py
added visualization, removed 2_hops label in saved graphs
1 parent a6a1fd0 commit 42605a8

1 file changed

Lines changed: 214 additions & 2 deletions

File tree

GraphReasoning/graph_analysis.py

Lines changed: 214 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,12 +1794,12 @@ def sample_path_with_randomness(source, target, randomness_factor, num_random_wa
17941794
# Add nodes and edges from the subgraph to the Pyvis network
17951795
nt.from_nx(subgraph)
17961796

1797-
fname = f'{data_dir}/shortest_path_2hops_{time_part}_{source}_{target}.html'
1797+
fname = f'{data_dir}/shortest_path_{time_part}_{source}_{target}.html'
17981798
nt.show(fname)
17991799
if verbatim:
18001800
print(f"HTML visualization: {fname}")
18011801

1802-
graph_GraphML = f'shortestpath_2hops_{time_part}_{source}_{target}.graphml'
1802+
graph_GraphML = f'shortestpath_{time_part}_{source}_{target}.graphml'
18031803
save_graph_without_text(subgraph, data_dir=data_dir, graph_name=graph_GraphML)
18041804

18051805
if verbatim:
@@ -1811,3 +1811,215 @@ def sample_path_with_randomness(source, target, randomness_factor, num_random_wa
18111811
shortest_path_length = len(path) - 1 # As path length is number of edges
18121812

18131813
return path, subgraph, shortest_path_length, fname, graph_GraphML
1814+
1815+
##############################
1816+
# Visualization
1817+
##############################
1818+
1819+
import networkx as nx
1820+
import random
1821+
import matplotlib.pyplot as plt
1822+
from datetime import datetime
1823+
from pyvis.network import Network
1824+
from copy import deepcopy
1825+
import numpy as np
1826+
import community as community_louvain
1827+
from tqdm.notebook import tqdm
1828+
1829+
# Function to precompute basic properties for the original graph
1830+
def precompute_basic_properties(G):
1831+
if G.is_directed():
1832+
undirected_G = G.to_undirected()
1833+
else:
1834+
undirected_G = G
1835+
1836+
print("Compute properties...")
1837+
properties = {
1838+
"number_of_nodes": G.number_of_nodes(),
1839+
"number_of_edges": G.number_of_edges(),
1840+
"density": nx.density(G),
1841+
"average_clustering": nx.average_clustering(undirected_G),
1842+
"average_degree": sum(dict(G.degree()).values()) / G.number_of_nodes()
1843+
}
1844+
1845+
return properties
1846+
1847+
# Analyze basic properties function
1848+
def analyze_basic_properties(G, original_properties=None):
1849+
if G.is_directed():
1850+
undirected_G = G.to_undirected()
1851+
else:
1852+
undirected_G = G
1853+
1854+
properties = {
1855+
"number_of_nodes": G.number_of_nodes(),
1856+
"number_of_edges": G.number_of_edges(),
1857+
"density": nx.density(G),
1858+
"average_clustering": nx.average_clustering(undirected_G),
1859+
"average_degree": sum(dict(G.degree()).values()) / G.number_of_nodes()
1860+
}
1861+
1862+
if original_properties:
1863+
original_degree_dict = dict(G.degree())
1864+
properties["original_degrees"] = {node: original_degree_dict.get(node, 0) for node in G.nodes()}
1865+
1866+
return properties
1867+
1868+
# Analyze communities function
1869+
def analyze_communities(G):
1870+
if G.is_directed():
1871+
G = G.to_undirected()
1872+
1873+
partition = community_louvain.best_partition(G)
1874+
1875+
communities = {}
1876+
for node, community in partition.items():
1877+
if community not in communities:
1878+
communities[community] = []
1879+
communities[community].append(node)
1880+
1881+
return communities
1882+
1883+
# Function to perform spectral analysis
1884+
def spectral_analysis(G):
1885+
if G.is_directed():
1886+
G = G.to_undirected()
1887+
1888+
L = nx.laplacian_matrix(G).todense()
1889+
eigenvalues = np.linalg.eigvals(L)
1890+
eigenvalues = np.sort(eigenvalues)
1891+
1892+
fiedler_value = eigenvalues[1] if len(eigenvalues) > 1 else 0 # Second smallest eigenvalue
1893+
spectral_gap = eigenvalues[-1] - eigenvalues[-2] if len(eigenvalues) > 1 else 0 # Largest - second largest eigenvalue
1894+
1895+
return fiedler_value, spectral_gap
1896+
1897+
# Function to analyze the graph
1898+
def analyze_graph(G, original_properties=None):
1899+
basic_properties = analyze_basic_properties(G, original_properties)
1900+
communities = analyze_communities(G)
1901+
fiedler_value, spectral_gap = spectral_analysis(G)
1902+
1903+
analysis_results = {
1904+
"basic_properties": basic_properties,
1905+
"communities": communities,
1906+
"fiedler_value": fiedler_value,
1907+
"spectral_gap": spectral_gap
1908+
}
1909+
1910+
return analysis_results
1911+
1912+
# Function to plot analysis trends
1913+
# Function to plot analysis trends
1914+
def plot_analysis_trends(analysis_results, num_waypoints_range, xlabel='Number of Random Waypoints',
1915+
include_avg_clustering=False):
1916+
num_nodes = [result["basic_properties"]["number_of_nodes"] for result in analysis_results]
1917+
num_edges = [result["basic_properties"]["number_of_edges"] for result in analysis_results]
1918+
density = [result["basic_properties"]["density"] for result in analysis_results]
1919+
avg_clustering = [result["basic_properties"]["average_clustering"] for result in analysis_results]
1920+
avg_degree = [result["basic_properties"]["average_degree"] for result in analysis_results]
1921+
fiedler_values = [result["fiedler_value"] for result in analysis_results]
1922+
spectral_gaps = [result["spectral_gap"] for result in analysis_results]
1923+
1924+
plt.figure(figsize=(15, 9))
1925+
1926+
ii=0
1927+
1928+
ii+=1
1929+
plt.subplot(3, 3, ii)
1930+
plt.plot(num_waypoints_range, num_nodes, marker='o')
1931+
plt.title('Number of Nodes')
1932+
plt.xlabel(xlabel)
1933+
plt.ylabel('Number of Nodes')
1934+
1935+
1936+
ii+=1
1937+
plt.subplot(3, 3, ii)
1938+
plt.plot(num_waypoints_range, num_edges, marker='o')
1939+
plt.title('Number of Edges')
1940+
plt.xlabel(xlabel)
1941+
plt.ylabel('Number of Edges')
1942+
1943+
ii+=1
1944+
plt.subplot(3, 3, ii)
1945+
plt.plot(num_waypoints_range, density, marker='o')
1946+
plt.title('Density')
1947+
plt.xlabel(xlabel)
1948+
plt.ylabel('Density')
1949+
1950+
if include_avg_clustering:
1951+
ii+=1
1952+
plt.subplot(3, 3, ii)
1953+
plt.plot(num_waypoints_range, avg_clustering, marker='o')
1954+
plt.title('Average Clustering Coefficient')
1955+
plt.xlabel(xlabel)
1956+
plt.ylabel('Average Clustering')
1957+
1958+
ii+=1
1959+
plt.subplot(3, 3, ii)
1960+
plt.plot(num_waypoints_range, avg_degree, marker='o')
1961+
plt.title('Average Degree')
1962+
plt.xlabel(xlabel)
1963+
plt.ylabel('Average Degree')
1964+
1965+
ii+=1
1966+
plt.subplot(3, 3, ii)
1967+
plt.plot(num_waypoints_range, fiedler_values, marker='o')
1968+
plt.title('Fiedler Value')
1969+
plt.xlabel(xlabel)
1970+
plt.ylabel('Fiedler Value')
1971+
1972+
ii+=1
1973+
plt.subplot(3, 3, ii)
1974+
plt.plot(num_waypoints_range, spectral_gaps, marker='o')
1975+
plt.title('Spectral Gap')
1976+
plt.xlabel(xlabel)
1977+
plt.ylabel('Spectral Gap')
1978+
1979+
1980+
plt.tight_layout()
1981+
1982+
plt.savefig(f"analysis_{xlabel}.svg")
1983+
1984+
plt.show()
1985+
1986+
"""
1987+
source= "silk"
1988+
target= "food"
1989+
1990+
# Assuming `G`, `embedding_tokenizer`, `embedding_model`, `source`, `target`, and `node_embeddings` are defined
1991+
num_waypoints_range = [0, 1, 2, 5, 10, 20, 50]
1992+
1993+
randomness_factor = 0.3
1994+
analysis_results = []
1995+
1996+
for num_waypoints in tqdm(num_waypoints_range, desc="Analyzing Waypoints"):
1997+
path, path_graph, shortest_path_length, _, _ = heuristic_path_with_embeddings_with_randomization_waypoints(
1998+
G,
1999+
embedding_tokenizer,
2000+
embedding_model,
2001+
source,
2002+
target,
2003+
node_embeddings,
2004+
top_k=3,
2005+
second_hop=False,
2006+
data_dir='./tmp_discovery_100',
2007+
#save_files=False,
2008+
save_files=True,
2009+
2010+
verbatim=False,
2011+
randomness_factor=randomness_factor,
2012+
num_random_waypoints=num_waypoints
2013+
)
2014+
if path_graph is not None:
2015+
analysis_result = analyze_graph(path_graph, original_properties=original_properties,)
2016+
analysis_results.append(analysis_result)
2017+
2018+
#path_list_for_vis, path_list_for_vis_string=path_list=print_path_with_edges_as_list(G, path, keywords_separator='--')
2019+
#print (path_list_for_vis_string,'----------')
2020+
#print (path_list,'----------')
2021+
else:
2022+
print(f"No valid path found for {num_waypoints} waypoints.")
2023+
2024+
plot_analysis_trends(analysis_results, num_waypoints_range,xlabel='Number of Random Waypoints')
2025+
"""

0 commit comments

Comments
 (0)