This commit is contained in:
François Pelletier 2017-01-02 22:50:48 -05:00
parent 14cc4a39fa
commit 93574518ff

295
facebook_app.py Normal file
View file

@ -0,0 +1,295 @@
# coding: utf-8
# Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch
#
# Reesources:
#
# - Facebook SDK pour Python https://github.com/mobolic/facebook-sdk
# - Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed
# - Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments
# - Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734
# - Python datetime: https://docs.python.org/2/library/datetime.html
# - Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination
# In[1]:
import requests
import facebook
from elasticsearch import Elasticsearch
from datetime import datetime
from datetime import timedelta
# In[2]:
app_id=os.environ['FB_APP_ID']
app_secret=os.environ['FB_APP_SECRET']
# In[3]:
def get_fb_token(app_id, app_secret):
payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret}
file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload)
#print file.text #to test what the FB api responded with
result = file.text.split("=")[1]
#print file.text #to test the TOKEN
return result
# In[4]:
access_token=get_fb_token(app_id,app_secret)
# In[5]:
week_delta = timedelta(days=7)
# In[6]:
es = Elasticsearch()
# In[11]:
es.indices.create(index='fb_page_post', ignore=400)
# In[10]:
#es.indices.delete(index='fb_page_post')
# In[18]:
graph = facebook.GraphAPI(access_token=access_token, version='2.7')
# In[19]:
page_obj = graph.get_object('PartiConservateurDuQuebec')
# In[12]:
def index_post(post):
es.index(
index="fb_page_post",
doc_type="fb_page_post_data",
id=post[u'id'],
timestamp=post[u'created_time'],
body=post)
def index_comments(post):
es.index(
index="fb_page_post",
doc_type="fb_page_post_comment",
id=post[u'id'],
timestamp=post[u'created_time'],
body=post)
def index_like(post):
es.index(
index="fb_page_post",
doc_type="fb_page_post_like",
id=post[u'like_id'],
timestamp=post[u'like_time'],
body=post)
# In[13]:
def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until):
all_posts = []
res = fb_graph.get_object('/'+
page_id+
'/posts?fields='+','.join(field_list)+
'&since='+time_since+
'&until='+time_until)
while(True):
try:
for page in res[u'data']:
all_posts.append(page)
res=requests.get(res['paging']['next']).json()
except KeyError:
break
return all_posts
# In[ ]:
#TEST
ttt = getfbpostsfrompage(graph,
page_obj[u'id'],
['id','created_time'],
(datetime.now().date()-week_delta).isoformat(),
datetime.now().date().isoformat())
# In[ ]:
#TEST
ttt
# In[14]:
def getpostmetacomplet(fb_graph,post_id,field_list):
post_meta_complet = fb_graph.get_object('/'+
post_id+
'?fields='+','.join(field_list))
return post_meta_complet
# In[ ]:
#TEST
ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via'])
# In[ ]:
#TEST
ppp_complet
# In[15]:
def getpostreact(fb_graph,post_id,field_list,react_type,filter_type):
res = fb_graph.get_object('/'+post_id+
'/'+react_type+'/?fields='+','.join(field_list)+
'&filter='+filter_type)
all_comments = []
while(True):
try:
for comment in res[u'data']:
all_comments.append(comment)
res=requests.get(res[u'paging'][u'next']).json()
except KeyError:
break
return all_comments
# In[16]:
def dict_update(l,x):
l.update(x)
return l
# In[ ]:
#TEST
ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream')
# In[ ]:
#TEST
[x[u'id'] for x in ccc[1:10]]
# In[ ]:
#TEST
[getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]]
# In[ ]:
#TEST
rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream')
# In[ ]:
#TEST
rrr[0]['id']
# In[ ]:
#TEST
ttt[0][u'id']+'_'+rrr[0]['id']
# In[ ]:
#TEST
rrr[0]['id']
# In[ ]:
#TEST
like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'],
'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr]
# In[ ]:
#TEST
like_dicts[1:5]
# In[ ]:
#TEST
ppp_complet.update({u'likes':rrr})
# In[ ]:
#TEST
ppp_complet
# In[22]:
res = getfbpostsfrompage(graph,
page_obj[u'id'],
['id','created_time'],
(datetime.now().date()-week_delta).isoformat(),
datetime.now().date().isoformat())
# In[23]:
for pp in res:
# Post
post_complet = getpostmetacomplet(graph,
pp[u'id'],
['message','created_time','id',
'status_type','shares','link',
'via'])
# Like
all_post_likes = getpostreact(graph,pp[u'id'],
['id','name'],
'likes',
'stream')
like_count = len(all_post_likes)
post_complet.update({u'like_count':like_count})
# Sauvegarde des "post"
index_post(post_complet)
# Sauvegarde des "like"
like_dicts = [dict_update(x,{u'like_time':pp['created_time'],
u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes]
for l in like_dicts:
index_like(l)
# Comments
res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream')
for cc in res_comments:
comment_complet = getpostmetacomplet(graph,
cc[u'id'],
['id','from','message',
'created_time','comment_count','like_count',
'parent'])
# Sauvegarde des "comments"
index_comments(comment_complet)
# In[ ]: