-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
basic version with batch insert and mmap search
- Loading branch information
0 parents
commit 86e343f
Showing
25 changed files
with
1,140 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: C/C++ CI | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Check out code | ||
uses: actions/checkout@v3 | ||
- name: Pull submodules | ||
run: git submodule update --init --recursive | ||
- name: Build docker images | ||
run: docker build -t local . | ||
- name: Run build | ||
run: docker run --rm local make |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
*DS_Store | ||
.cache | ||
.vscode/ | ||
build/ | ||
notes.txt | ||
compile_commands.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "externals/sanitizers-cmake"] | ||
path = externals/sanitizers-cmake | ||
url = https://github.com/arsenm/sanitizers-cmake.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
FROM ubuntu:24.04 as base | ||
|
||
ENV TZ=Asia/Yerevan | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
# disable certificate check (for kitware) | ||
RUN touch /etc/apt/apt.conf.d/99verify-peer.conf && \ | ||
echo >> /etc/apt/apt.conf.d/99verify-peer.conf "Acquire { https::Verify-Peer false }" | ||
|
||
# add kitware repo so that fresh cmake could be installed | ||
RUN apt -y update | ||
RUN apt install -y software-properties-common gpg wget flex bison pkg-config | ||
RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \ | ||
apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' | ||
RUN apt -y update | ||
RUN apt install --reinstall ca-certificates | ||
RUN apt install kitware-archive-keyring | ||
RUN apt -y purge --auto-remove cmake && apt -y install cmake | ||
|
||
# install essential tools | ||
RUN apt -y update && \ | ||
apt -y install clang make cmake python3-pip git && \ | ||
apt -y remove gcc | ||
|
||
RUN apt install -y libopenblas-dev libomp-dev | ||
RUN wget https://github.com/facebookresearch/faiss/archive/refs/tags/v1.8.0.tar.gz && \ | ||
tar xf v1.8.0.tar.gz && \ | ||
cd faiss-1.8.0 && mkdir build && cd build && \ | ||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=0 -DFAISS_OPT_LEVEL=generic -DFAISS_ENABLE_PYTHON=OFF -DFAISS_ENABLE_GPU=OFF .. && \ | ||
make -j`$(nproc)` faiss && make install | ||
|
||
# Conan | ||
RUN rm /usr/lib/python3.12/EXTERNALLY-MANAGED && \ | ||
pip3 install --force-reinstall --ignore-installed conan && \ | ||
conan profile detect && \ | ||
sed -i 's/compiler.cppstd=gnu17/compiler.cppstd=23/g' ~/.conan2/profiles/default | ||
|
||
|
||
WORKDIR /workdir | ||
|
||
|
||
# dev | ||
FROM base as dev | ||
COPY . . | ||
ENTRYPOINT [ "/usr/bin/bash", "-l", "-c" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
.DEFAULT_GOAL := debug | ||
|
||
current_dir=$(CURDIR) | ||
|
||
conan-install-debug: | ||
conan install . --build=missing --settings=build_type=Debug | ||
|
||
conan-install-release: | ||
conan install . --build=missing --settings=build_type=Release | ||
|
||
conan-install-reldbg: | ||
conan install . --build=missing --settings=build_type=RelWithDebInfo | ||
|
||
configure-release: conan-install-release | ||
cmake -Hsrc -Bbuild/Release \ | ||
-DCMAKE_C_COMPILER=clang \ | ||
-DCMAKE_CXX_COMPILER=clang++ \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_TOOLCHAIN_FILE=$(current_dir)/build/Release/generators/conan_toolchain.cmake | ||
|
||
configure-reldbg: conan-install-reldbg | ||
cmake -Hsrc -Bbuild/RelWithDebInfo \ | ||
-DCMAKE_C_COMPILER=clang \ | ||
-DCMAKE_CXX_COMPILER=clang++ \ | ||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | ||
-DCMAKE_TOOLCHAIN_FILE=$(current_dir)/build/RelWithDebInfo/generators/conan_toolchain.cmake | ||
|
||
configure-debug: conan-install-debug | ||
cmake -Hsrc -Bbuild/Debug \ | ||
-DCMAKE_C_COMPILER=clang \ | ||
-DCMAKE_CXX_COMPILER=clang++ \ | ||
-DCMAKE_BUILD_TYPE=Debug \ | ||
-DCMAKE_TOOLCHAIN_FILE=$(current_dir)/build/Debug/generators/conan_toolchain.cmake | ||
|
||
release: configure-release | ||
make -j`$(nproc)` -C build/Release | ||
|
||
reldbg: configure-reldbg | ||
make -j`$(nproc)` -C build/RelWithDebInfo | ||
|
||
debug: configure-debug | ||
make -j`$(nproc)` -C build/Debug |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Urukrama | ||
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) | ||
![example workflow](https://github.com/kisasexypantera94/urukrama/actions/workflows/c-cpp.yml/badge.svg) | ||
|
||
Urukrama is a simple and efficient implementation of DiskANN algorithm, developed for fun and self-education. | ||
|
||
## Setup | ||
```bash | ||
git submodule update --init --recursive | ||
docker compose up -d | ||
``` | ||
attach to the container and run | ||
``` | ||
make release | ||
``` | ||
|
||
## References | ||
* **DiskANN: Fast Accurate Billion-point Nearest Neighbor Search on a Single Node.** | ||
Suhas Jayaram Subramanya, Rohan Kadekodi, Ravishankar Krishaswamy, and Harsha Vardhan Simhadri, 2019. | ||
* **ParlayANN: Scalable and Deterministic Parallel Graph-Based Approximate Nearest Neighbor Search Algorithms**. | ||
Magdalen Dobson Manohar, Zheqi Shen, Guy E. Blelloch, Laxman Dhulipala, Yan Gu, Harsha Vardhan Simhadri, Yihan Sun, 2023. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
[requires] | ||
eigen/3.4.0 | ||
tsl-robin-map/1.2.2 | ||
tsl-hopscotch-map/2.3.1 | ||
tsl-sparse-map/0.6.2 | ||
boost/1.84.0 | ||
spdlog/1.13.0 | ||
onetbb/2021.12.0 | ||
mio/cci.20230303 | ||
|
||
[generators] | ||
CMakeDeps | ||
CMakeToolchain | ||
|
||
[layout] | ||
cmake_layout | ||
|
||
[options] | ||
boost*:header_only=True | ||
spdlog*:header_only=True | ||
spdlog*:use_std_fmt=True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
version: '3.4' | ||
services: | ||
dev: | ||
build: | ||
context: . | ||
target: dev | ||
network_mode: host | ||
stdin_open: true | ||
tty: true | ||
entrypoint: ["bash"] |
Submodule sanitizers-cmake
added at
3f0542
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
BasedOnStyle: Google | ||
IndentWidth: 4 | ||
AccessModifierOffset: -4 | ||
ColumnLimit: 120 | ||
|
||
AlignEscapedNewlines: Right | ||
AlignTrailingComments: false | ||
|
||
AllowAllArgumentsOnNextLine: false | ||
AllowShortFunctionsOnASingleLine: true | ||
|
||
AllowShortIfStatementsOnASingleLine: true | ||
AllowShortLoopsOnASingleLine: true | ||
|
||
BinPackArguments: false | ||
BinPackParameters: false | ||
|
||
BreakBeforeBraces: WebKit | ||
BreakBeforeTernaryOperators: true | ||
BreakConstructorInitializers: BeforeComma | ||
BreakInheritanceList: BeforeComma | ||
|
||
|
||
DerivePointerAlignment: false | ||
PointerAlignment: Left | ||
|
||
SortIncludes: true | ||
IncludeBlocks: Preserve | ||
|
||
SpaceBeforeInheritanceColon: false | ||
SpaceBeforeCtorInitializerColon: false | ||
SpaceBeforeRangeBasedForLoopColon: false | ||
SpacesInContainerLiterals: false | ||
|
||
Standard: Latest | ||
|
||
MaxEmptyLinesToKeep: 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
Checks: " | ||
bugprone-*, | ||
cppcoreguidelines-*, | ||
clang-analyzer-*, | ||
-bugprone-easily-swappable-parameters, | ||
-cppcoreguidelines-avoid-magic-numbers, | ||
" | ||
WarningsAsErrors: '' | ||
HeaderFilterRegex: '.' | ||
FormatStyle: none |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
cmake_minimum_required(VERSION 3.28.3) | ||
|
||
project(urukrama) | ||
|
||
set(CMAKE_CXX_STANDARD 23) | ||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast") | ||
|
||
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/../externals/sanitizers-cmake/cmake" ${CMAKE_MODULE_PATH}) | ||
# set(CMAKE_CXX_CLANG_TIDY "clang-tidy") | ||
|
||
# Conan | ||
find_package(Eigen3 REQUIRED) | ||
find_package(tsl-robin-map REQUIRED) | ||
find_package(Boost REQUIRED) | ||
find_package(spdlog REQUIRED) | ||
find_package(TBB REQUIRED) | ||
|
||
# apt | ||
find_package(BLAS REQUIRED) | ||
find_package(OpenMP REQUIRED) | ||
|
||
# submodules | ||
find_package(Sanitizers REQUIRED) | ||
|
||
add_executable(urukrama | ||
main.cpp | ||
utils.cpp | ||
in_memory_graph.cpp | ||
on_disk_graph.cpp | ||
faiss.cpp | ||
) | ||
|
||
target_link_libraries(urukrama | ||
spdlog::spdlog_header_only | ||
Eigen3::Eigen | ||
tsl::robin_map | ||
boost::boost | ||
onetbb::onetbb | ||
faiss | ||
OpenMP::OpenMP_CXX | ||
${BLAS_LIBRARIES} | ||
) | ||
|
||
target_link_directories(urukrama | ||
PUBLIC | ||
${CMAKE_LIBRARY_PATH} | ||
) | ||
|
||
target_include_directories(urukrama | ||
PUBLIC | ||
${CMAKE_INCLUDE_PATH} | ||
) | ||
|
||
add_sanitizers(urukrama) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#pragma once | ||
|
||
#include <cstddef> | ||
#include <vector> | ||
|
||
template <typename K, typename V> | ||
class BoundedSortedVector { | ||
public: | ||
BoundedSortedVector(size_t limit): m_limit(limit) {} | ||
|
||
public: | ||
void emplace(const K& key, const V& val) | ||
{ | ||
if (m_data.size() > m_limit and m_data.back().first < key) { | ||
return; | ||
} | ||
|
||
const auto element = std::make_pair(key, val); | ||
m_data.insert(std::lower_bound(m_data.begin(), m_data.end(), element), element); | ||
|
||
if (m_data.size() > m_limit) { | ||
m_data.pop_back(); | ||
} | ||
} | ||
|
||
auto begin() { return m_data.begin(); } | ||
auto end() { return m_data.end(); } | ||
|
||
void reserve(size_t num) { m_data.reserve(num); } | ||
|
||
void resize(size_t n) | ||
{ | ||
m_limit = n; | ||
m_data.resize(m_limit); | ||
} | ||
|
||
operator std::vector<std::pair<K, V>>() && { return std::move(m_data); } | ||
|
||
private: | ||
size_t m_limit; | ||
std::vector<std::pair<K, V>> m_data; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#pragma once | ||
|
||
#include <concepts> | ||
#include <ranges> | ||
#include <span> | ||
|
||
namespace urukrama { | ||
|
||
template <typename D> | ||
concept CDistance = requires(std::span<const typename D::Type> a) { | ||
typename D::Type; | ||
|
||
{ D::Compute(a, a) } -> std::same_as<typename D::Type>; | ||
}; | ||
|
||
template <typename T> | ||
struct NaiveL2 { | ||
using Type = T; | ||
|
||
static T Compute(std::span<const T> a, std::span<const T> b) | ||
{ | ||
T distance = 0; | ||
|
||
for (const auto [x, y]: std::views::zip(a, b)) { | ||
T diff = x - y; | ||
distance += diff * diff; | ||
} | ||
|
||
return distance; | ||
} | ||
}; | ||
|
||
} // namespace urukrama |
Oops, something went wrong.