Skip to content

Commit

Permalink
basic version with batch insert and mmap search
Browse files Browse the repository at this point in the history
  • Loading branch information
kisasexypantera94 committed Apr 22, 2024
0 parents commit 86e343f
Show file tree
Hide file tree
Showing 25 changed files with 1,140 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/c-cpp.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: C/C++ CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:

runs-on: ubuntu-latest

steps:
- name: Check out code
uses: actions/checkout@v3
- name: Pull submodules
run: git submodule update --init --recursive
- name: Build docker images
run: docker build -t local .
- name: Run build
run: docker run --rm local make
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*DS_Store
.cache
.vscode/
build/
notes.txt
compile_commands.json
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "externals/sanitizers-cmake"]
path = externals/sanitizers-cmake
url = https://github.com/arsenm/sanitizers-cmake.git
45 changes: 45 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
FROM ubuntu:24.04 as base

ENV TZ=Asia/Yerevan
ENV DEBIAN_FRONTEND=noninteractive

# disable certificate check (for kitware)
RUN touch /etc/apt/apt.conf.d/99verify-peer.conf && \
echo >> /etc/apt/apt.conf.d/99verify-peer.conf "Acquire { https::Verify-Peer false }"

# add kitware repo so that fresh cmake could be installed
RUN apt -y update
RUN apt install -y software-properties-common gpg wget flex bison pkg-config
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
RUN apt -y update
RUN apt install --reinstall ca-certificates
RUN apt install kitware-archive-keyring
RUN apt -y purge --auto-remove cmake && apt -y install cmake

# install essential tools
RUN apt -y update && \
apt -y install clang make cmake python3-pip git && \
apt -y remove gcc

RUN apt install -y libopenblas-dev libomp-dev
RUN wget https://github.com/facebookresearch/faiss/archive/refs/tags/v1.8.0.tar.gz && \
tar xf v1.8.0.tar.gz && \
cd faiss-1.8.0 && mkdir build && cd build && \
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=0 -DFAISS_OPT_LEVEL=generic -DFAISS_ENABLE_PYTHON=OFF -DFAISS_ENABLE_GPU=OFF .. && \
make -j`$(nproc)` faiss && make install

# Conan
RUN rm /usr/lib/python3.12/EXTERNALLY-MANAGED && \
pip3 install --force-reinstall --ignore-installed conan && \
conan profile detect && \
sed -i 's/compiler.cppstd=gnu17/compiler.cppstd=23/g' ~/.conan2/profiles/default


WORKDIR /workdir


# dev
FROM base as dev
COPY . .
ENTRYPOINT [ "/usr/bin/bash", "-l", "-c" ]
42 changes: 42 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
.DEFAULT_GOAL := debug

current_dir=$(CURDIR)

conan-install-debug:
conan install . --build=missing --settings=build_type=Debug

conan-install-release:
conan install . --build=missing --settings=build_type=Release

conan-install-reldbg:
conan install . --build=missing --settings=build_type=RelWithDebInfo

configure-release: conan-install-release
cmake -Hsrc -Bbuild/Release \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=$(current_dir)/build/Release/generators/conan_toolchain.cmake

configure-reldbg: conan-install-reldbg
cmake -Hsrc -Bbuild/RelWithDebInfo \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_TOOLCHAIN_FILE=$(current_dir)/build/RelWithDebInfo/generators/conan_toolchain.cmake

configure-debug: conan-install-debug
cmake -Hsrc -Bbuild/Debug \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_TOOLCHAIN_FILE=$(current_dir)/build/Debug/generators/conan_toolchain.cmake

release: configure-release
make -j`$(nproc)` -C build/Release

reldbg: configure-reldbg
make -j`$(nproc)` -C build/RelWithDebInfo

debug: configure-debug
make -j`$(nproc)` -C build/Debug
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Urukrama
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
![example workflow](https://github.com/kisasexypantera94/urukrama/actions/workflows/c-cpp.yml/badge.svg)

Urukrama is a simple and efficient implementation of DiskANN algorithm, developed for fun and self-education.

## Setup
```bash
git submodule update --init --recursive
docker compose up -d
```
attach to the container and run
```
make release
```

## References
* **DiskANN: Fast Accurate Billion-point Nearest Neighbor Search on a Single Node.**
Suhas Jayaram Subramanya, Rohan Kadekodi, Ravishankar Krishaswamy, and Harsha Vardhan Simhadri, 2019.
* **ParlayANN: Scalable and Deterministic Parallel Graph-Based Approximate Nearest Neighbor Search Algorithms**.
Magdalen Dobson Manohar, Zheqi Shen, Guy E. Blelloch, Laxman Dhulipala, Yan Gu, Harsha Vardhan Simhadri, Yihan Sun, 2023.
21 changes: 21 additions & 0 deletions conanfile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[requires]
eigen/3.4.0
tsl-robin-map/1.2.2
tsl-hopscotch-map/2.3.1
tsl-sparse-map/0.6.2
boost/1.84.0
spdlog/1.13.0
onetbb/2021.12.0
mio/cci.20230303

[generators]
CMakeDeps
CMakeToolchain

[layout]
cmake_layout

[options]
boost*:header_only=True
spdlog*:header_only=True
spdlog*:use_std_fmt=True
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: '3.4'
services:
dev:
build:
context: .
target: dev
network_mode: host
stdin_open: true
tty: true
entrypoint: ["bash"]
1 change: 1 addition & 0 deletions externals/sanitizers-cmake
Submodule sanitizers-cmake added at 3f0542
37 changes: 37 additions & 0 deletions src/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
BasedOnStyle: Google
IndentWidth: 4
AccessModifierOffset: -4
ColumnLimit: 120

AlignEscapedNewlines: Right
AlignTrailingComments: false

AllowAllArgumentsOnNextLine: false
AllowShortFunctionsOnASingleLine: true

AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true

BinPackArguments: false
BinPackParameters: false

BreakBeforeBraces: WebKit
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma


DerivePointerAlignment: false
PointerAlignment: Left

SortIncludes: true
IncludeBlocks: Preserve

SpaceBeforeInheritanceColon: false
SpaceBeforeCtorInitializerColon: false
SpaceBeforeRangeBasedForLoopColon: false
SpacesInContainerLiterals: false

Standard: Latest

MaxEmptyLinesToKeep: 2
10 changes: 10 additions & 0 deletions src/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Checks: "
bugprone-*,
cppcoreguidelines-*,
clang-analyzer-*,
-bugprone-easily-swappable-parameters,
-cppcoreguidelines-avoid-magic-numbers,
"
WarningsAsErrors: ''
HeaderFilterRegex: '.'
FormatStyle: none
55 changes: 55 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
cmake_minimum_required(VERSION 3.28.3)

project(urukrama)

set(CMAKE_CXX_STANDARD 23)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")

set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/../externals/sanitizers-cmake/cmake" ${CMAKE_MODULE_PATH})
# set(CMAKE_CXX_CLANG_TIDY "clang-tidy")

# Conan
find_package(Eigen3 REQUIRED)
find_package(tsl-robin-map REQUIRED)
find_package(Boost REQUIRED)
find_package(spdlog REQUIRED)
find_package(TBB REQUIRED)

# apt
find_package(BLAS REQUIRED)
find_package(OpenMP REQUIRED)

# submodules
find_package(Sanitizers REQUIRED)

add_executable(urukrama
main.cpp
utils.cpp
in_memory_graph.cpp
on_disk_graph.cpp
faiss.cpp
)

target_link_libraries(urukrama
spdlog::spdlog_header_only
Eigen3::Eigen
tsl::robin_map
boost::boost
onetbb::onetbb
faiss
OpenMP::OpenMP_CXX
${BLAS_LIBRARIES}
)

target_link_directories(urukrama
PUBLIC
${CMAKE_LIBRARY_PATH}
)

target_include_directories(urukrama
PUBLIC
${CMAKE_INCLUDE_PATH}
)

add_sanitizers(urukrama)
42 changes: 42 additions & 0 deletions src/bounded_sorted_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once

#include <cstddef>
#include <vector>

template <typename K, typename V>
class BoundedSortedVector {
public:
BoundedSortedVector(size_t limit): m_limit(limit) {}

public:
void emplace(const K& key, const V& val)
{
if (m_data.size() > m_limit and m_data.back().first < key) {
return;
}

const auto element = std::make_pair(key, val);
m_data.insert(std::lower_bound(m_data.begin(), m_data.end(), element), element);

if (m_data.size() > m_limit) {
m_data.pop_back();
}
}

auto begin() { return m_data.begin(); }
auto end() { return m_data.end(); }

void reserve(size_t num) { m_data.reserve(num); }

void resize(size_t n)
{
m_limit = n;
m_data.resize(m_limit);
}

operator std::vector<std::pair<K, V>>() && { return std::move(m_data); }

private:
size_t m_limit;
std::vector<std::pair<K, V>> m_data;
};
33 changes: 33 additions & 0 deletions src/distance.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include <concepts>
#include <ranges>
#include <span>

namespace urukrama {

template <typename D>
concept CDistance = requires(std::span<const typename D::Type> a) {
typename D::Type;

{ D::Compute(a, a) } -> std::same_as<typename D::Type>;
};

template <typename T>
struct NaiveL2 {
using Type = T;

static T Compute(std::span<const T> a, std::span<const T> b)
{
T distance = 0;

for (const auto [x, y]: std::views::zip(a, b)) {
T diff = x - y;
distance += diff * diff;
}

return distance;
}
};

} // namespace urukrama
Loading

0 comments on commit 86e343f

Please sign in to comment.