Skip to content

Commit

Permalink
fix hits selector, support custom user agent
Browse files Browse the repository at this point in the history
  • Loading branch information
xyb committed Jul 17, 2020
1 parent 6a6c701 commit 22d903f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ __pycache__
.DS_Store
googlesearch
googledistance
*.html
*.css
test_googlesearch
test_googledistance
15 changes: 13 additions & 2 deletions src/googlesearch.nim
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
## Nim library for scraping google search results.

import httpclient
import math
import os
Expand Down Expand Up @@ -38,8 +40,12 @@ proc newProxyHttpClient(): HttpClient =
else:
result = newHttpClient()

var userAgent = getEnv("USER_AGENT", "")
if userAgent.len == 0:
userAgent = USER_AGENT

result.headers = newHttpHeaders({
"User-Agent": USER_AGENT,
"User-Agent": userAgent,
"Accept-Language": "en-US,en;q=0.5",
})

Expand Down Expand Up @@ -84,9 +90,12 @@ iterator search*(query: string, maxResults = 10): SearchResult =

proc hits*(query: string): int =
## Search the given query string using Google and return the number of hits.
runnableExamples:
doAssert hits("nim-lang") > 0

let html = queryHtml(query)
let xml = parseHtml(newStringStream(html))
let results = xml.querySelectorAll("div#resultStats")
let results = xml.querySelectorAll("div#result-stats")
for stats in results:
for match in stats.innerText().findAll(re"[\d,.]+"):
return parseInt(match.replace(re","))
Expand All @@ -99,6 +108,8 @@ proc distance*(term1, term2: string): float =
##
## More details:
## https://en.wikipedia.org/wiki/Normalized_Google_distance
runnableExamples:
doAssert distance("nim-lang", "pascal") > 0.0

let
term1Hits = log10(float(hits(term1)))
Expand Down

0 comments on commit 22d903f

Please sign in to comment.