Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: better phone number handling #4

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,9 @@ module.exports = {
},
// this ignores everything from being transformed in node_modules except for the compromise module
transformIgnorePatterns: ["node_modules/(?!compromise)/"],
globals: {
navigator: {
language: "de-CH",
},
},
}
19 changes: 8 additions & 11 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"dependencies": {
"addresser": "^1.1.20",
"compromise": "^14.9.0",
"libphonenumber-js": "^1.10.55",
"luhn": "^2.4.1",
"phone": "^3.1.37",
"sweetalert2": "^11.7.20"
},
"devDependencies": {
Expand Down
81 changes: 67 additions & 14 deletions src/__tests__/detectors_tests/isPhoneNumber.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,35 @@
import { isPhoneNumber } from '../../prompt'

// some extra test case numbers from Phone Number
// GitHub Issue: https://github.com/lakeraai/chrome-extension/issues/1
const testNumbersUS = [
'+1 (617) 867-5309',
'+1-617-867-5309',
'+1.617.867.5309',
'+1 617 867 5309',
'617-867-5309',
'617.867.5309',
'617 867 5309',
'(617) 867-5309',
'00(617) 867-5309'
]

// some formats that are just kind of broken and hard to parse
const testNonNumbersUS = ['00617 867 5309', '00617-867-5309', '00617.867.5309']

// we'll default to the Swiss locale for these tests
const navigator = { language: 'de-CH' }

Object.defineProperty(globalThis, 'navigator', {
value: navigator,
writable: true,
configurable: true
})

afterEach(() => {
navigator.language = 'de-CH'
})

describe('Phone numbers', () => {
describe('True positives', () => {
test('phone number with 41 country code', () => {
Expand Down Expand Up @@ -49,27 +79,34 @@ describe('Phone numbers', () => {
test('phone number with +41 country code and in context', () => {
expect(isPhoneNumber('I texted my friend at +41747587256').pii).toBe(true)
})
})

describe('False positives', () => {})
describe('US phone number with dashes instead of parentheses', () => {
test('user locale is CH', () => {
expect(isPhoneNumber('617-867-5309').pii).toBe(true)
})

describe('True negatives', () => {
test('phone number with less than 11 digits', () => {
expect(isPhoneNumber('+4174758725').pii).toBe(false)
test('user locale is US', () => {
navigator.language = 'en-US'
expect(isPhoneNumber('617-867-5309').pii).toBe(true)
})
})

test('phone number without country code', () => {
expect(isPhoneNumber('747587256').pii).toBe(false)
})
describe('Extra test cases', () => {
test.each(testNumbersUS)('Test: %s', (num) => {
navigator.language = 'en-US'
expect(isPhoneNumber(num).pii).toBe(true)
})

test('phone number with 41 country code and random characters', () => {
expect(isPhoneNumber('41 asdadsa-747-587-256').pii).toBe(false)
test.each(testNonNumbersUS)('Test: %s', (num) => {
navigator.language = 'en-US'
expect(isPhoneNumber(num).pii).toBe(false)
})
})
})

test('phone number with +41 country code and random characters', () => {
expect(isPhoneNumber('+41 asdadsa-747-587-256').pii).toBe(false)
})
describe('False positives', () => {})

describe('True negatives', () => {
test('phone number with split digits', () => {
expect(
isPhoneNumber(
Expand All @@ -79,5 +116,21 @@ describe('Phone numbers', () => {
})
})

describe('False negatives', () => {})
describe('Edge cases', () => {
test('Swiss phone number with 41 country code and random characters', () => {
expect(isPhoneNumber('41 asdadsa-747-587-256').pii).toBe(true)
})

test('phone number with +41 country code and random characters', () => {
expect(isPhoneNumber('+41 asdadsa-747-587-256').pii).toBe(true)
})

test('phone number without country code', () => {
expect(isPhoneNumber('747587256').pii).toBe(true)
})

test('phone number with less than 11 digits', () => {
expect(isPhoneNumber('+4174758725').pii).toBe(false)
})
})
})
68 changes: 58 additions & 10 deletions src/prompt.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import luhn from 'luhn'
import nlp from 'compromise'
import phone from 'phone'
import addresser from 'addresser'

import { findPhoneNumbersInText } from 'libphonenumber-js'

import type { CountryCode, NumberFound } from 'libphonenumber-js/types'

import {
GROUP_CREDIT_CARD_DIGITS,
CREDIT_CARD_REGEX_STR,
Expand All @@ -12,6 +16,7 @@ import {
SECRET_KEY_REGEX,
SSN_REGEX
} from './config'

import {
type Pii,
registerDetectors,
Expand Down Expand Up @@ -71,16 +76,59 @@ export function isEmail (promptText: string): Pii {
}

export function isPhoneNumber (promptText: string): Pii {
const splitPromptText: string[] = promptText.split(/[^\d\s-()]{1,}/)
// because phone numbers are often used without a country code
// we need to do some extra work to validate potential phone numbers
// NOTE: This will still miss numbers that aren't valid in the user's
// browser locale or the US; we could try to geolocate the user, but then
// we'd still only be relying on their current location and not necessarily
// the country for which the phone number is valid

// first let's get the user's locale from the browser and try to extract
// the country code from it
const userRegion: CountryCode =
(navigator?.language?.split('-')?.[
navigator?.language?.split('-').length - 1
] as CountryCode) ??
(navigator?.languages?.[0]?.split('-')?.[
navigator?.language?.split('-').length - 1
] as CountryCode) ??
// default to the US if we can't determine the user's region from the navigator language setting
'US'

// check the prompt for phone numbers using the country code of the user's locale region
const phoneNumbersWithUserRegion = findPhoneNumbersInText(promptText, {
defaultCountry: userRegion,
extended: true
})

let phoneNumbersWithoutUserRegion: NumberFound[] = []

if (userRegion !== 'US') {
// check the prompt for phone numbers using the US country code
phoneNumbersWithoutUserRegion = findPhoneNumbersInText(promptText, {
defaultCountry: 'US',
extended: true
})
}

for (const text of splitPromptText) {
const sanitizedPromptText = text.replace(/\D/g, '')
const phoneNumber = '+'.concat(sanitizedPromptText)
if (phone(phoneNumber).isValid) {
return {
pii: true,
message: `<br><div align="left">• <strong>phone number</strong>: "${phoneNumber}"</div>`
}
// get a de-duplicated list of the found phone numbers
const foundPhoneNumbers: NumberFound[] = [
...phoneNumbersWithUserRegion,
...phoneNumbersWithoutUserRegion
].filter(
(item, index, array) => array.findIndex((i) => i.number === item.number) === index
)

// if we found any phone numbers, add them to the warning message
if (foundPhoneNumbers.length > 0) {
return {
pii: true,
message: `<br />${foundPhoneNumbers
.map(
(phoneNumber) =>
`<div align="left">• <strong>phone number</strong>: ${phoneNumber.number.formatInternational()}</div>`
)
.join('<br />')}`
}
}

Expand Down