Skip to content
This repository has been archived by the owner on Jul 27, 2022. It is now read-only.

Commit

Permalink
Added REGEX and reverse search for links
Browse files Browse the repository at this point in the history
  • Loading branch information
pielco11 committed Feb 3, 2019
1 parent 6d4a948 commit d9a9571
Showing 1 changed file with 55 additions and 3 deletions.
58 changes: 55 additions & 3 deletions twint/src/twint/transforms/getTweets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import json, re

from elasticsearch import Elasticsearch
from canari.maltego.entities import Person, Twit, Hashtag, Location
from canari.maltego.entities import Person, Twit, Hashtag, Location, URL
from canari.maltego.transform import Transform

es = Elasticsearch()
Expand Down Expand Up @@ -186,7 +186,7 @@ def do_transform(self, request, response, config):
user = request.entity
_body = {
'query': {
'match' :
'match':
{
'user': user.value
}
Expand All @@ -200,3 +200,55 @@ def do_transform(self, request, response, config):
r.value = _user['follow']
response += r
return response

class getLinksFromUser(Transform):
input_type = Person

def do_transform(self, request, response, config):
user = request.entity
_body = {
'query': {
'bool': {
'must': [
{'match': {'username': user.value}},
{'regexp': {'tweet': '[a-zA-Z0-9]{1,63}'}}
]
}
},
'size': request.limits.hard
}
res = es.search(index="twinttweets", body=_body)
for hit in res['hits']['hits']:
tweet = hit['_source']
_links = re.findall(r'[\w]{3,8}://[\w.]{2,63}.[\w/]{1,63}', tweet['tweet'])
for l in _links:
r = URL()
r.url = l
r.title = l.split('/')[2]
response += r
return response

class getTweetsFromLink(Transform):
input_type = URL

def do_transform(self, request, response, config):
url = request.entity
_body = {
'query': {
'match_phrase': {'tweet': url.value}
},
'size': request.limits.hard
}
res = es.search(index="twinttweets", body=_body)
for hit in res['hits']['hits']:
tweet = hit['_source']
r = Twit()
r.id = int(tweet['id'])
r.content = tweet['tweet'].encode('ascii', 'ignore')
r.name = tweet['tweet'].encode('ascii', 'ignore')
r.title = tweet['tweet'].encode('ascii', 'ignore')[:30]
r.pubdate = tweet['date']
r.author = tweet['username']
r.author_uri = 'https://twitter.com/' + tweet['username']
response += r
return response

1 comment on commit d9a9571

@pielco11
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#1

Please sign in to comment.