-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimportFromDirectory.mjs
65 lines (61 loc) · 1.77 KB
/
importFromDirectory.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import fs from "fs";
import OpenAI from "openai";
import { insert, count } from "@orama/orama";
import { Command } from "commander";
import db from "./db.mjs";
// @ts-ignore
import { persistToFile } from "@orama/plugin-data-persistence/server";
import getEmbedding from "./embedding.mjs";
const openai = new OpenAI();
const program = new Command();
program.option("-d, --directory <directory>");
program.parse();
const options = program.opts();
const { directory } = options;
/**
* @param {string} segment a segment of text
* @param {string} fileName the file this segment of text is from
**/
const processSegment = async (segment, fileName) => {
segment = segment.trim();
if (segment.length === 0) return;
/** @typedef {string[]|null} tags */
console.log("processing segment:", segment);
const tags =
segment.match(/\[\[.*?\]\]/g)?.map((tag) => {
return tag.replace("[[", "").replace("]]", "");
}) || [];
try {
const embedding = await getEmbedding(segment);
const entry = {
parent: fileName,
tags,
embedding,
content: segment,
};
await insert(db, entry);
} catch (e) {
console.error(e);
}
};
if (directory) process.chdir(directory);
const files = fs.readdirSync(`./`);
const promises = [];
for (let file of files) {
const file_text = fs.readFileSync(file, "utf8");
const segments = file_text?.split("\n") || "";
for (let segment of segments) {
promises.push(processSegment(segment, file));
}
}
try {
await Promise.all(promises);
} finally {
// need to switch back or we'll save db in wrong place
if (directory) process.chdir("../");
await persistToFile(db, "binary", "./.dbfile.msp");
const dbCount = await count(db);
console.log(`db has ${dbCount} entries`);
console.log("done!");
process.exit(0);
}