# Performance of graph search

from Red Blob Games
28 Jul 2014

## 1  Measure performance per node not per graph#

The problem is that we don’t know the sizes of the graphs; is the performance per node reasonably consistent? Don’t know. I’d expect 100-1000 nodes per millisecond.

## 2  Python#

```def breadth_first_search(graph, start, goal):
frontier = collections.deque()
frontier.append(start)
came_from = {}
came_from[start] = None

while len(frontier) > 0:
current = frontier.popleft()

if current == goal:
break

for next in graph.neighbors(current):
if next not in came_from:
frontier.append(next)
came_from[next] = current

return came_from

g = SquareGrid(30, 15)
g.walls = DIAGRAM1_WALLS

import time
def time_it():
count = 100
t = time.time()
for i in range(count):
print(1000*(time.time() - t)/count, "ms")

time_it()
```

### 2.2 dijkstra’s algorithm

```def dijkstra_search(graph, start, goal):
frontier = PriorityQueue()
frontier.put(start, 0)
came_from = {}
cost_so_far = {}
came_from[start] = None
cost_so_far[start] = 0

while not frontier.empty():
current = frontier.get()

if current == goal:
break

for next in graph.neighbors(current):
new_cost = cost_so_far[current] + graph.cost(current, next)
if next not in cost_so_far or new_cost < cost_so_far[next]:
cost_so_far[next] = new_cost
priority = new_cost
frontier.put(next, priority)
came_from[next] = current

return came_from, cost_so_far

import time
def time_it():
count = 100
t = time.time()
for i in range(count):
dijkstra_search(diagram4, (1, 4), (7, 8))
print(1000*(time.time() - t)/count, "ms")

time_it()
```

## 3  C++#

I extracted this to performance.cpp so I could try it with various compiler optimization flags.

• Python 2ms
• clang++ without optimization 1ms
• clang++ with -O3 0.28ms
```#include <time.h>
#include "redblobgames/pathfinding/a-star/implementation.cpp"

template<typename Graph>
unordered_map<typename Graph::Location, typename Graph::Location>
breadth_first_search(Graph graph, typename Graph::Location start) {
typedef typename Graph::Location Location;
queue<Location> frontier;
frontier.push(start);

unordered_map<Location, Location> came_from;
came_from[start] = start;

while (!frontier.empty()) {
auto current = frontier.front();
frontier.pop();

for (auto next : graph.neighbors(current)) {
if (!came_from.count(next)) {
frontier.push(next);
came_from[next] = current;
}
}
}
return came_from;
}

int main() {
SquareGrid grid = make_diagram1();
int count = 1000;
clock_t t = clock();
for (int i = 0; i < count; i++) {
auto parents = breadth_first_search(grid, std::make_tuple(7, 8));
}
std::cout << "Avg time per graph search: " << (1000.0*(clock() - t)/count/CLOCKS_PER_SEC) << " ms" << std::endl;
}
```

### 3.2 breadth first search optimized

Instead of using a single queue that has to add/remove elements at both ends, use two queues. We add to one queue while removing from the other, then swap them.

```#include <time.h>
#include <array>
#include <vector>
#include <iostream>

using namespace std;

struct {int dx,dy;} DIRS[] = {{1, 0}, {0, -1}, {-1, 0}, {0, 1}};

struct SquareGrid {
typedef uint16_t Location;

int width, height;
vector<bool> walls;

SquareGrid(int width_, int height_)
: width(width_), height(height_) {
walls.resize(size());
}

int size() const { return 1 + width*height; }
Location to_id(int x, int y) const { return 1 + x + y*width; }
int to_x(Location id) const { return (id-1) % width; }
int to_y(Location id) const { return (id-1) / width; }
bool in_bounds(Location id) const { return 1 <= id && id <= size(); }
bool passable(Location id) const { return !walls[id]; }

void neighbors(Location id, vector<Location>& neighbors) const {
int x = to_x(id), y = to_y(id);
for (int i = 0; i < 4; i++) {
Location next = to_id(x + DIRS[i].dx, y + DIRS[i].dy);
if (in_bounds(next) && passable(next)) {
neighbors.push_back(next);
}
}
}
};

void add_rect(SquareGrid& grid, int x1, int y1, int x2, int y2) {
for (int x = x1; x < x2; ++x) {
for (int y = y1; y < y2; ++y) {
int id = grid.to_id(x, y);
grid.walls[id] = true;
}
}
}

SquareGrid make_diagram1() {
SquareGrid grid(30, 15);
return grid;
}

template<typename Graph>
vector<typename Graph::Location>
breadth_first_search(Graph graph, typename Graph::Location start) {
typedef typename Graph::Location Location;
vector<Location> frontier_curr;
vector<Location> frontier_next;
frontier_curr.push_back(start);

vector<Location> came_from;
vector<Location> neighbors;
came_from.resize(graph.size());
came_from[start] = start;

while(!frontier_curr.empty()) {
for (const auto& current : frontier_curr) {
neighbors.clear();
graph.neighbors(current, neighbors);
for (const auto& next : neighbors) {
if (came_from[next] == 0) {
frontier_next.push_back(next);
came_from[next] = current;
}
}
}
frontier_curr.clear();
swap(frontier_curr, frontier_next);
}
return came_from;
}

int main() {
SquareGrid grid = make_diagram1();
int count = 1000;
clock_t t = clock();
for (int i = 0; i < count; i++) {
auto parents = breadth_first_search(grid, grid.to_id(7, 8));
}
cout << "Avg time per graph search: " << (1000.0*(clock() - t)/count/CLOCKS_PER_SEC) << " ms\n";
}
```

When using the double-loop we get 0.315 ms

When using int id instead of tuple<int,int> and a vector<int> instead of unordered_map<tuple,tuple> we get 0.191 ms

It’d also be faster if neighbors() filled an array instead of allocating a new one, and if it were int ids instead of tuples –> 0.013661 ms

It’d likely be even faster if we don’t need the full array but just a path, because we can use an uninitialized came_from and only initialize a `visited` bitvector

```c++ -std=c++14 -I ~/Sites -O3 performance-2.cpp -o /tmp/a.out && time /tmp/a.out
instruments -t "Time Profiler" -D /tmp/trace.trace /tmp/a.out
```

Email me , or tweet @redblobgames, or comment: