From 8d414e22504e9b350852136ddeee7c90f8044e47 Mon Sep 17 00:00:00 2001 From: Eric Allen Date: Thu, 8 Feb 2024 17:07:59 -0500 Subject: [PATCH] fix: better phone number handling --- jest.config.js | 5 ++ package-lock.json | 19 ++--- package.json | 2 +- .../detectors_tests/isPhoneNumber.test.ts | 81 +++++++++++++++---- src/prompt.ts | 68 +++++++++++++--- 5 files changed, 139 insertions(+), 36 deletions(-) diff --git a/jest.config.js b/jest.config.js index 74c4745..b5e9122 100644 --- a/jest.config.js +++ b/jest.config.js @@ -6,4 +6,9 @@ module.exports = { }, // this ignores everything from being transformed in node_modules except for the compromise module transformIgnorePatterns: ["node_modules/(?!compromise)/"], + globals: { + navigator: { + language: "de-CH", + }, + }, } diff --git a/package-lock.json b/package-lock.json index a9af8c9..bbce398 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,18 +1,18 @@ { "name": "lakera", - "version": "0.1.0", + "version": "0.1.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lakera", - "version": "0.1.0", + "version": "0.1.4", "hasInstallScript": true, "dependencies": { "addresser": "^1.1.20", "compromise": "^14.9.0", + "libphonenumber-js": "^1.10.55", "luhn": "^2.4.1", - "phone": "^3.1.37", "sweetalert2": "^11.7.20" }, "devDependencies": { @@ -8593,6 +8593,11 @@ "node": ">= 0.8.0" } }, + "node_modules/libphonenumber-js": { + "version": "1.10.55", + "resolved": "https://registry.npmjs.org/libphonenumber-js/-/libphonenumber-js-1.10.55.tgz", + "integrity": "sha512-MrTg2JFLscgmTY6/oT9vopYETlgUls/FU6OaeeamGwk4LFxjIgOUML/ZSZICgR0LPYXaonVJo40lzMvaaTJlQA==" + }, "node_modules/lilconfig": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-2.1.0.tgz", @@ -9204,14 +9209,6 @@ "node": ">=8" } }, - "node_modules/phone": { - "version": "3.1.39", - "resolved": "https://registry.npmjs.org/phone/-/phone-3.1.39.tgz", - "integrity": "sha512-+9ON+0aIh7Ax9C/3od2jZBA+przhtixdBYZxKME5DwFcpGVW3XQAQXLYIvFr8AMUI8Zr6o1Mj3kkY35B0AvQVw==", - "engines": { - "node": ">=12" - } - }, "node_modules/picocolors": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", diff --git a/package.json b/package.json index ecb0b50..9433a63 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ "dependencies": { "addresser": "^1.1.20", "compromise": "^14.9.0", + "libphonenumber-js": "^1.10.55", "luhn": "^2.4.1", - "phone": "^3.1.37", "sweetalert2": "^11.7.20" }, "devDependencies": { diff --git a/src/__tests__/detectors_tests/isPhoneNumber.test.ts b/src/__tests__/detectors_tests/isPhoneNumber.test.ts index 829da8f..4afd958 100644 --- a/src/__tests__/detectors_tests/isPhoneNumber.test.ts +++ b/src/__tests__/detectors_tests/isPhoneNumber.test.ts @@ -1,5 +1,35 @@ import { isPhoneNumber } from '../../prompt' +// some extra test case numbers from Phone Number +// GitHub Issue: https://github.com/lakeraai/chrome-extension/issues/1 +const testNumbersUS = [ + '+1 (617) 867-5309', + '+1-617-867-5309', + '+1.617.867.5309', + '+1 617 867 5309', + '617-867-5309', + '617.867.5309', + '617 867 5309', + '(617) 867-5309', + '00(617) 867-5309' +] + +// some formats that are just kind of broken and hard to parse +const testNonNumbersUS = ['00617 867 5309', '00617-867-5309', '00617.867.5309'] + +// we'll default to the Swiss locale for these tests +const navigator = { language: 'de-CH' } + +Object.defineProperty(globalThis, 'navigator', { + value: navigator, + writable: true, + configurable: true +}) + +afterEach(() => { + navigator.language = 'de-CH' +}) + describe('Phone numbers', () => { describe('True positives', () => { test('phone number with 41 country code', () => { @@ -49,27 +79,34 @@ describe('Phone numbers', () => { test('phone number with +41 country code and in context', () => { expect(isPhoneNumber('I texted my friend at +41747587256').pii).toBe(true) }) - }) - describe('False positives', () => {}) + describe('US phone number with dashes instead of parentheses', () => { + test('user locale is CH', () => { + expect(isPhoneNumber('617-867-5309').pii).toBe(true) + }) - describe('True negatives', () => { - test('phone number with less than 11 digits', () => { - expect(isPhoneNumber('+4174758725').pii).toBe(false) + test('user locale is US', () => { + navigator.language = 'en-US' + expect(isPhoneNumber('617-867-5309').pii).toBe(true) + }) }) - test('phone number without country code', () => { - expect(isPhoneNumber('747587256').pii).toBe(false) - }) + describe('Extra test cases', () => { + test.each(testNumbersUS)('Test: %s', (num) => { + navigator.language = 'en-US' + expect(isPhoneNumber(num).pii).toBe(true) + }) - test('phone number with 41 country code and random characters', () => { - expect(isPhoneNumber('41 asdadsa-747-587-256').pii).toBe(false) + test.each(testNonNumbersUS)('Test: %s', (num) => { + navigator.language = 'en-US' + expect(isPhoneNumber(num).pii).toBe(false) + }) }) + }) - test('phone number with +41 country code and random characters', () => { - expect(isPhoneNumber('+41 asdadsa-747-587-256').pii).toBe(false) - }) + describe('False positives', () => {}) + describe('True negatives', () => { test('phone number with split digits', () => { expect( isPhoneNumber( @@ -79,5 +116,21 @@ describe('Phone numbers', () => { }) }) - describe('False negatives', () => {}) + describe('Edge cases', () => { + test('Swiss phone number with 41 country code and random characters', () => { + expect(isPhoneNumber('41 asdadsa-747-587-256').pii).toBe(true) + }) + + test('phone number with +41 country code and random characters', () => { + expect(isPhoneNumber('+41 asdadsa-747-587-256').pii).toBe(true) + }) + + test('phone number without country code', () => { + expect(isPhoneNumber('747587256').pii).toBe(true) + }) + + test('phone number with less than 11 digits', () => { + expect(isPhoneNumber('+4174758725').pii).toBe(false) + }) + }) }) diff --git a/src/prompt.ts b/src/prompt.ts index 7abb000..e849d64 100644 --- a/src/prompt.ts +++ b/src/prompt.ts @@ -1,7 +1,11 @@ import luhn from 'luhn' import nlp from 'compromise' -import phone from 'phone' import addresser from 'addresser' + +import { findPhoneNumbersInText } from 'libphonenumber-js' + +import type { CountryCode, NumberFound } from 'libphonenumber-js/types' + import { GROUP_CREDIT_CARD_DIGITS, CREDIT_CARD_REGEX_STR, @@ -12,6 +16,7 @@ import { SECRET_KEY_REGEX, SSN_REGEX } from './config' + import { type Pii, registerDetectors, @@ -71,16 +76,59 @@ export function isEmail (promptText: string): Pii { } export function isPhoneNumber (promptText: string): Pii { - const splitPromptText: string[] = promptText.split(/[^\d\s-()]{1,}/) + // because phone numbers are often used without a country code + // we need to do some extra work to validate potential phone numbers + // NOTE: This will still miss numbers that aren't valid in the user's + // browser locale or the US; we could try to geolocate the user, but then + // we'd still only be relying on their current location and not necessarily + // the country for which the phone number is valid + + // first let's get the user's locale from the browser and try to extract + // the country code from it + const userRegion: CountryCode = + (navigator?.language?.split('-')?.[ + navigator?.language?.split('-').length - 1 + ] as CountryCode) ?? + (navigator?.languages?.[0]?.split('-')?.[ + navigator?.language?.split('-').length - 1 + ] as CountryCode) ?? + // default to the US if we can't determine the user's region from the navigator language setting + 'US' + + // check the prompt for phone numbers using the country code of the user's locale region + const phoneNumbersWithUserRegion = findPhoneNumbersInText(promptText, { + defaultCountry: userRegion, + extended: true + }) + + let phoneNumbersWithoutUserRegion: NumberFound[] = [] + + if (userRegion !== 'US') { + // check the prompt for phone numbers using the US country code + phoneNumbersWithoutUserRegion = findPhoneNumbersInText(promptText, { + defaultCountry: 'US', + extended: true + }) + } - for (const text of splitPromptText) { - const sanitizedPromptText = text.replace(/\D/g, '') - const phoneNumber = '+'.concat(sanitizedPromptText) - if (phone(phoneNumber).isValid) { - return { - pii: true, - message: `
phone number: "${phoneNumber}"
` - } + // get a de-duplicated list of the found phone numbers + const foundPhoneNumbers: NumberFound[] = [ + ...phoneNumbersWithUserRegion, + ...phoneNumbersWithoutUserRegion + ].filter( + (item, index, array) => array.findIndex((i) => i.number === item.number) === index + ) + + // if we found any phone numbers, add them to the warning message + if (foundPhoneNumbers.length > 0) { + return { + pii: true, + message: `
${foundPhoneNumbers + .map( + (phoneNumber) => + `
phone number: ${phoneNumber.number.formatInternational()}
` + ) + .join('
')}` } }