llmcodegen/src/llm_codegen/dependency_sorter.py

115 lines
3.3 KiB
Python

"""
Module for dependency sorting and cycle detection.
Provides functions to perform topological sort and detect cycles in a dependency graph.
"""
from collections import defaultdict, deque
def topological_sort(dependencies):
"""
Perform topological sort on a dependency graph.
Args:
dependencies (dict): A dictionary where keys are nodes (e.g., file paths) and values are lists of dependencies.
Example: {"src/llm_codegen/core.py": ["src/llm_codegen/utils.py", "src/llm_codegen/models.py"], ...}
Returns:
list: A list of nodes in topological order.
Raises:
ValueError: If a cycle is detected in the graph, with details of the cycle.
"""
# Kahn's algorithm for topological sort
graph = defaultdict(list)
in_degree = defaultdict(int)
# Build graph and compute in-degree
for node, deps in dependencies.items():
graph[node] = deps[:] # Copy to avoid modification
for dep in deps:
in_degree[dep] += 1
# Ensure all nodes are included in in_degree
if node not in in_degree:
in_degree[node] = 0
# Initialize queue with nodes having zero in-degree
queue = deque([node for node in in_degree if in_degree[node] == 0])
sorted_nodes = []
while queue:
node = queue.popleft()
sorted_nodes.append(node)
for neighbor in graph.get(node, []):
in_degree[neighbor] -= 1
if in_degree[neighbor] == 0:
queue.append(neighbor)
# Check for cycles
if len(sorted_nodes) != len(in_degree):
# Cycle detected, find and report it
cycle = _detect_cycle_dfs(dependencies)
raise ValueError(f"Cycle detected in dependency graph: {cycle}")
return sorted_nodes
def _detect_cycle_dfs(dependencies):
"""
Internal helper function to detect a cycle in a dependency graph using DFS.
Args:
dependencies (dict): Same as topological_sort.
Returns:
list: A list of nodes forming a cycle if found, else an empty list.
"""
graph = defaultdict(list)
for node, deps in dependencies.items():
graph[node] = deps[:]
visited = set()
rec_stack = set()
cycle = []
def dfs(node, path):
nonlocal cycle
visited.add(node)
rec_stack.add(node)
path.append(node)
for neighbor in graph.get(node, []):
if neighbor not in visited:
if dfs(neighbor, path):
return True
elif neighbor in rec_stack:
# Cycle detected, extract from path
start_index = path.index(neighbor)
cycle = path[start_index:] + [neighbor]
return True
rec_stack.remove(node)
path.pop()
return False
for node in graph:
if node not in visited:
if dfs(node, []):
return cycle
return []
def detect_cycles(dependencies):
"""
Detect cycles in a dependency graph.
Args:
dependencies (dict): Same as topological_sort.
Returns:
tuple: (has_cycle, cycle_nodes), where has_cycle is a boolean, and cycle_nodes is a list if cycle found.
"""
cycle = _detect_cycle_dfs(dependencies)
if cycle:
return True, cycle
return False, []