# coding: utf-8 # Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch # # Reesources: # # - Facebook SDK pour Python https://github.com/mobolic/facebook-sdk # - Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed # - Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments # - Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734 # - Python datetime: https://docs.python.org/2/library/datetime.html # - Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination # In[1]: import requests import facebook from elasticsearch import Elasticsearch from datetime import datetime from datetime import timedelta # In[2]: app_id=os.environ['FB_APP_ID'] app_secret=os.environ['FB_APP_SECRET'] # In[3]: def get_fb_token(app_id, app_secret): payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret} file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload) #print file.text #to test what the FB api responded with result = file.text.split("=")[1] #print file.text #to test the TOKEN return result # In[4]: access_token=get_fb_token(app_id,app_secret) # In[5]: week_delta = timedelta(days=7) # In[6]: es = Elasticsearch() # In[11]: es.indices.create(index='fb_page_post', ignore=400) # In[10]: #es.indices.delete(index='fb_page_post') # In[18]: graph = facebook.GraphAPI(access_token=access_token, version='2.7') # In[19]: page_obj = graph.get_object('PartiConservateurDuQuebec') # In[12]: def index_post(post): es.index( index="fb_page_post", doc_type="fb_page_post_data", id=post[u'id'], timestamp=post[u'created_time'], body=post) def index_comments(post): es.index( index="fb_page_post", doc_type="fb_page_post_comment", id=post[u'id'], timestamp=post[u'created_time'], body=post) def index_like(post): es.index( index="fb_page_post", doc_type="fb_page_post_like", id=post[u'like_id'], timestamp=post[u'like_time'], body=post) # In[13]: def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until): all_posts = [] res = fb_graph.get_object('/'+ page_id+ '/posts?fields='+','.join(field_list)+ '&since='+time_since+ '&until='+time_until) while(True): try: for page in res[u'data']: all_posts.append(page) res=requests.get(res['paging']['next']).json() except KeyError: break return all_posts # In[ ]: #TEST ttt = getfbpostsfrompage(graph, page_obj[u'id'], ['id','created_time'], (datetime.now().date()-week_delta).isoformat(), datetime.now().date().isoformat()) # In[ ]: #TEST ttt # In[14]: def getpostmetacomplet(fb_graph,post_id,field_list): post_meta_complet = fb_graph.get_object('/'+ post_id+ '?fields='+','.join(field_list)) return post_meta_complet # In[ ]: #TEST ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via']) # In[ ]: #TEST ppp_complet # In[15]: def getpostreact(fb_graph,post_id,field_list,react_type,filter_type): res = fb_graph.get_object('/'+post_id+ '/'+react_type+'/?fields='+','.join(field_list)+ '&filter='+filter_type) all_comments = [] while(True): try: for comment in res[u'data']: all_comments.append(comment) res=requests.get(res[u'paging'][u'next']).json() except KeyError: break return all_comments # In[16]: def dict_update(l,x): l.update(x) return l # In[ ]: #TEST ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream') # In[ ]: #TEST [x[u'id'] for x in ccc[1:10]] # In[ ]: #TEST [getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]] # In[ ]: #TEST rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream') # In[ ]: #TEST rrr[0]['id'] # In[ ]: #TEST ttt[0][u'id']+'_'+rrr[0]['id'] # In[ ]: #TEST rrr[0]['id'] # In[ ]: #TEST like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'], 'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr] # In[ ]: #TEST like_dicts[1:5] # In[ ]: #TEST ppp_complet.update({u'likes':rrr}) # In[ ]: #TEST ppp_complet # In[22]: res = getfbpostsfrompage(graph, page_obj[u'id'], ['id','created_time'], (datetime.now().date()-week_delta).isoformat(), datetime.now().date().isoformat()) # In[23]: for pp in res: # Post post_complet = getpostmetacomplet(graph, pp[u'id'], ['message','created_time','id', 'status_type','shares','link', 'via']) # Like all_post_likes = getpostreact(graph,pp[u'id'], ['id','name'], 'likes', 'stream') like_count = len(all_post_likes) post_complet.update({u'like_count':like_count}) # Sauvegarde des "post" index_post(post_complet) # Sauvegarde des "like" like_dicts = [dict_update(x,{u'like_time':pp['created_time'], u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes] for l in like_dicts: index_like(l) # Comments res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream') for cc in res_comments: comment_complet = getpostmetacomplet(graph, cc[u'id'], ['id','from','message', 'created_time','comment_count','like_count', 'parent']) # Sauvegarde des "comments" index_comments(comment_complet) # In[ ]: