From 93574518ff358a41ca2396a0ec9ef6d043b92eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Pelletier?= Date: Mon, 2 Jan 2017 22:50:48 -0500 Subject: [PATCH] script --- facebook_app.py | 295 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 facebook_app.py diff --git a/facebook_app.py b/facebook_app.py new file mode 100644 index 0000000..7ef1e40 --- /dev/null +++ b/facebook_app.py @@ -0,0 +1,295 @@ + +# coding: utf-8 + +# Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch +# +# Reesources: +# +# - Facebook SDK pour Python https://github.com/mobolic/facebook-sdk +# - Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed +# - Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments +# - Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734 +# - Python datetime: https://docs.python.org/2/library/datetime.html +# - Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination + +# In[1]: + +import requests +import facebook +from elasticsearch import Elasticsearch +from datetime import datetime +from datetime import timedelta + + +# In[2]: + +app_id=os.environ['FB_APP_ID'] +app_secret=os.environ['FB_APP_SECRET'] + + +# In[3]: + +def get_fb_token(app_id, app_secret): + payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret} + file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload) + #print file.text #to test what the FB api responded with + result = file.text.split("=")[1] + #print file.text #to test the TOKEN + return result + + +# In[4]: + +access_token=get_fb_token(app_id,app_secret) + + +# In[5]: + +week_delta = timedelta(days=7) + + +# In[6]: + +es = Elasticsearch() + + +# In[11]: + +es.indices.create(index='fb_page_post', ignore=400) + + +# In[10]: + +#es.indices.delete(index='fb_page_post') + + +# In[18]: + +graph = facebook.GraphAPI(access_token=access_token, version='2.7') + + +# In[19]: + +page_obj = graph.get_object('PartiConservateurDuQuebec') + + +# In[12]: + +def index_post(post): + es.index( + index="fb_page_post", + doc_type="fb_page_post_data", + id=post[u'id'], + timestamp=post[u'created_time'], + body=post) +def index_comments(post): + es.index( + index="fb_page_post", + doc_type="fb_page_post_comment", + id=post[u'id'], + timestamp=post[u'created_time'], + body=post) +def index_like(post): + es.index( + index="fb_page_post", + doc_type="fb_page_post_like", + id=post[u'like_id'], + timestamp=post[u'like_time'], + body=post) + + +# In[13]: + +def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until): + all_posts = [] + res = fb_graph.get_object('/'+ + page_id+ + '/posts?fields='+','.join(field_list)+ + '&since='+time_since+ + '&until='+time_until) + while(True): + try: + for page in res[u'data']: + all_posts.append(page) + res=requests.get(res['paging']['next']).json() + except KeyError: + break + return all_posts + + +# In[ ]: + +#TEST +ttt = getfbpostsfrompage(graph, + page_obj[u'id'], + ['id','created_time'], + (datetime.now().date()-week_delta).isoformat(), + datetime.now().date().isoformat()) + + +# In[ ]: + +#TEST +ttt + + +# In[14]: + +def getpostmetacomplet(fb_graph,post_id,field_list): + post_meta_complet = fb_graph.get_object('/'+ + post_id+ + '?fields='+','.join(field_list)) + return post_meta_complet + + +# In[ ]: + +#TEST +ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via']) + + +# In[ ]: + +#TEST +ppp_complet + + +# In[15]: + +def getpostreact(fb_graph,post_id,field_list,react_type,filter_type): + res = fb_graph.get_object('/'+post_id+ + '/'+react_type+'/?fields='+','.join(field_list)+ + '&filter='+filter_type) + all_comments = [] + while(True): + try: + for comment in res[u'data']: + all_comments.append(comment) + res=requests.get(res[u'paging'][u'next']).json() + except KeyError: + break + return all_comments + + +# In[16]: + +def dict_update(l,x): + l.update(x) + return l + + +# In[ ]: + +#TEST +ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream') + + +# In[ ]: + +#TEST +[x[u'id'] for x in ccc[1:10]] + + +# In[ ]: + +#TEST +[getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]] + + +# In[ ]: + +#TEST +rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream') + + +# In[ ]: + +#TEST +rrr[0]['id'] + + +# In[ ]: + +#TEST +ttt[0][u'id']+'_'+rrr[0]['id'] + + +# In[ ]: + +#TEST +rrr[0]['id'] + + +# In[ ]: + +#TEST +like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'], + 'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr] + + +# In[ ]: + +#TEST +like_dicts[1:5] + + +# In[ ]: + +#TEST +ppp_complet.update({u'likes':rrr}) + + +# In[ ]: + +#TEST +ppp_complet + + +# In[22]: + +res = getfbpostsfrompage(graph, + page_obj[u'id'], + ['id','created_time'], + (datetime.now().date()-week_delta).isoformat(), + datetime.now().date().isoformat()) + + +# In[23]: + +for pp in res: + # Post + post_complet = getpostmetacomplet(graph, + pp[u'id'], + ['message','created_time','id', + 'status_type','shares','link', + 'via']) + # Like + all_post_likes = getpostreact(graph,pp[u'id'], + ['id','name'], + 'likes', + 'stream') + like_count = len(all_post_likes) + post_complet.update({u'like_count':like_count}) + # Sauvegarde des "post" + index_post(post_complet) + # Sauvegarde des "like" + like_dicts = [dict_update(x,{u'like_time':pp['created_time'], + u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes] + for l in like_dicts: + index_like(l) + # Comments + res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream') + for cc in res_comments: + comment_complet = getpostmetacomplet(graph, + cc[u'id'], + ['id','from','message', + 'created_time','comment_count','like_count', + 'parent']) + # Sauvegarde des "comments" + index_comments(comment_complet) + + +# In[ ]: + + +