Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace gpt3-tokenizer with js-tiktoken for Enhanced Tokenization Performance #112

Merged
merged 5 commits into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 62 additions & 43 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@
"@vercel/analytics": "^1.2.2",
"common-tags": "^1.8.2",
"firebase": "^10.11.0",
"gpt3-tokenizer": "^1.1.5",
"highlight.js": "^11.9.0",
"highlightjs-copy": "^1.0.5",
"javascript-time-ago": "^2.5.10",
"js-tiktoken": "^1.0.14",
"openai": "^4.38.2",
"random-word-slugs": "^0.1.7",
"sse.js": "^2.4.1",
Expand Down
61 changes: 23 additions & 38 deletions src/misc/openai.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import type { Chat, ChatCost, ChatMessage } from './shared';
import GPT3Tokenizer from 'gpt3-tokenizer';
import { encodingForModel } from 'js-tiktoken';
import { ChatStorekeeper } from './chatStorekeeper';

// Initialization is slow, so only do it once.
// TypeScript misinterprets the export default class GPT3Tokenizer from gpt3-tokenizer
// and throws "TypeError: GPT3Tokenizer is not a constructor" if we try to call the ctor here.
// Therefore, we initialize the tokenizer in the first call to countTokens().
let tokenizer: GPT3Tokenizer;
const tokenizer = encodingForModel('gpt-4-turbo');

export enum AiProvider {
OpenAi = 'OpenAI',
Expand Down Expand Up @@ -52,7 +49,7 @@ export interface AiModelStats {
hidden?: boolean;
}

export const models: { [key in AiModel]: AiModelStats } = {
export const models: Record<AiModel, AiModelStats> = {
[AiModel.Gpt35Turbo]: {
provider: AiProvider.OpenAi,
maxTokens: 4096,
Expand Down Expand Up @@ -137,32 +134,22 @@ export const models: { [key in AiModel]: AiModelStats } = {
};

export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, AiProvider.Meta];

/**
* see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander
* see https://github.com/syonfox/GPT-3-Encoder/issues/2
*/
export function countTokens(message: ChatMessage): number {
// see comment above
if (!tokenizer) {
tokenizer = new GPT3Tokenizer({ type: 'gpt3' });
}

let num_tokens = 4; // every message follows <im_start>{role/name}\n{content}<im_end>\n
for (const [key, value] of Object.entries(message)) {
if (key !== 'name' && key !== 'role' && key !== 'content') {
continue;
}
const encoded: { bpe: number[]; text: string[] } = tokenizer.encode(value);
num_tokens += encoded.text.length;
if (key === 'name') {
num_tokens--; // if there's a name, the role is omitted
if (key === 'name' || key === 'role' || key === 'content') {
const tokensCount = tokenizer.encode(value).length;
num_tokens += (key === 'name') ? tokensCount - 1 : tokensCount;
}
}

return num_tokens;
}

export function modelExists(modelName: OpenAiModel): boolean {
export function modelExists(modelName: AiModel): boolean {
return modelName in models;
}

Expand All @@ -173,42 +160,40 @@ export function estimateChatCost(chat: Chat): ChatCost {
const messages = ChatStorekeeper.getCurrentMessageBranch(chat);

for (const message of messages) {
const tokens = countTokens(message);
if (message.role === 'assistant') {
tokensCompletion += countTokens(message);
tokensCompletion += tokens;
} else {
tokensPrompt += countTokens(message);
tokensPrompt += tokens;
}
}

// see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander
const tokensTotal = tokensPrompt + tokensCompletion + 2; // every reply is primed with <im_start>assistant
const { contextWindow, costInput, costOutput } = models[chat.settings.model];
const costPrompt = (costInput / 1000000.0) * tokensPrompt;
const costCompletion = (costOutput / 1000000.0) * tokensCompletion;
const costPrompt = (costInput / 1_000_000) * tokensPrompt;
const costCompletion = (costOutput / 1_000_000) * tokensCompletion;

return {
tokensPrompt,
tokensCompletion,
tokensTotal: tokensTotal,
tokensTotal,
costPrompt,
costCompletion,
costTotal: costPrompt + costCompletion,
maxTokensForModel: contextWindow
};
}

export function getProviderForModel(model: AiModel) {
const result = AiProvider.OpenAi;
if (model) {
if (model.includes('llama')) {
return AiProvider.Meta;
} else if (model.includes('mistral')) {
return AiProvider.Mistral;
}
export function getProviderForModel(model: AiModel): AiProvider {
if (model.includes('llama')) {
return AiProvider.Meta;
} else if (model.includes('mistral')) {
return AiProvider.Mistral;
}
return result;
return AiProvider.OpenAi;
}

export function getDefaultModelForProvider(provider: AiProvider) {
return Object.keys(models).find((key) => models[key as AiModel].provider === provider) as AiModel;
}
export function getDefaultModelForProvider(provider: AiProvider): AiModel {
return (Object.keys(models) as AiModel[]).find(key => models[key].provider === provider)!;
}