295 lines
6.5 KiB
Python
295 lines
6.5 KiB
Python
|
|
# coding: utf-8
|
|
|
|
# Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch
|
|
#
|
|
# Reesources:
|
|
#
|
|
# - Facebook SDK pour Python https://github.com/mobolic/facebook-sdk
|
|
# - Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed
|
|
# - Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments
|
|
# - Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734
|
|
# - Python datetime: https://docs.python.org/2/library/datetime.html
|
|
# - Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination
|
|
|
|
# In[1]:
|
|
|
|
import requests
|
|
import facebook
|
|
from elasticsearch import Elasticsearch
|
|
from datetime import datetime
|
|
from datetime import timedelta
|
|
|
|
|
|
# In[2]:
|
|
|
|
app_id=os.environ['FB_APP_ID']
|
|
app_secret=os.environ['FB_APP_SECRET']
|
|
|
|
|
|
# In[3]:
|
|
|
|
def get_fb_token(app_id, app_secret):
|
|
payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret}
|
|
file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload)
|
|
#print file.text #to test what the FB api responded with
|
|
result = file.text.split("=")[1]
|
|
#print file.text #to test the TOKEN
|
|
return result
|
|
|
|
|
|
# In[4]:
|
|
|
|
access_token=get_fb_token(app_id,app_secret)
|
|
|
|
|
|
# In[5]:
|
|
|
|
week_delta = timedelta(days=7)
|
|
|
|
|
|
# In[6]:
|
|
|
|
es = Elasticsearch()
|
|
|
|
|
|
# In[11]:
|
|
|
|
es.indices.create(index='fb_page_post', ignore=400)
|
|
|
|
|
|
# In[10]:
|
|
|
|
#es.indices.delete(index='fb_page_post')
|
|
|
|
|
|
# In[18]:
|
|
|
|
graph = facebook.GraphAPI(access_token=access_token, version='2.7')
|
|
|
|
|
|
# In[19]:
|
|
|
|
page_obj = graph.get_object('PartiConservateurDuQuebec')
|
|
|
|
|
|
# In[12]:
|
|
|
|
def index_post(post):
|
|
es.index(
|
|
index="fb_page_post",
|
|
doc_type="fb_page_post_data",
|
|
id=post[u'id'],
|
|
timestamp=post[u'created_time'],
|
|
body=post)
|
|
def index_comments(post):
|
|
es.index(
|
|
index="fb_page_post",
|
|
doc_type="fb_page_post_comment",
|
|
id=post[u'id'],
|
|
timestamp=post[u'created_time'],
|
|
body=post)
|
|
def index_like(post):
|
|
es.index(
|
|
index="fb_page_post",
|
|
doc_type="fb_page_post_like",
|
|
id=post[u'like_id'],
|
|
timestamp=post[u'like_time'],
|
|
body=post)
|
|
|
|
|
|
# In[13]:
|
|
|
|
def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until):
|
|
all_posts = []
|
|
res = fb_graph.get_object('/'+
|
|
page_id+
|
|
'/posts?fields='+','.join(field_list)+
|
|
'&since='+time_since+
|
|
'&until='+time_until)
|
|
while(True):
|
|
try:
|
|
for page in res[u'data']:
|
|
all_posts.append(page)
|
|
res=requests.get(res['paging']['next']).json()
|
|
except KeyError:
|
|
break
|
|
return all_posts
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ttt = getfbpostsfrompage(graph,
|
|
page_obj[u'id'],
|
|
['id','created_time'],
|
|
(datetime.now().date()-week_delta).isoformat(),
|
|
datetime.now().date().isoformat())
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ttt
|
|
|
|
|
|
# In[14]:
|
|
|
|
def getpostmetacomplet(fb_graph,post_id,field_list):
|
|
post_meta_complet = fb_graph.get_object('/'+
|
|
post_id+
|
|
'?fields='+','.join(field_list))
|
|
return post_meta_complet
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via'])
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ppp_complet
|
|
|
|
|
|
# In[15]:
|
|
|
|
def getpostreact(fb_graph,post_id,field_list,react_type,filter_type):
|
|
res = fb_graph.get_object('/'+post_id+
|
|
'/'+react_type+'/?fields='+','.join(field_list)+
|
|
'&filter='+filter_type)
|
|
all_comments = []
|
|
while(True):
|
|
try:
|
|
for comment in res[u'data']:
|
|
all_comments.append(comment)
|
|
res=requests.get(res[u'paging'][u'next']).json()
|
|
except KeyError:
|
|
break
|
|
return all_comments
|
|
|
|
|
|
# In[16]:
|
|
|
|
def dict_update(l,x):
|
|
l.update(x)
|
|
return l
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
[x[u'id'] for x in ccc[1:10]]
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
[getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]]
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream')
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
rrr[0]['id']
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ttt[0][u'id']+'_'+rrr[0]['id']
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
rrr[0]['id']
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'],
|
|
'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr]
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
like_dicts[1:5]
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ppp_complet.update({u'likes':rrr})
|
|
|
|
|
|
# In[ ]:
|
|
|
|
#TEST
|
|
ppp_complet
|
|
|
|
|
|
# In[22]:
|
|
|
|
res = getfbpostsfrompage(graph,
|
|
page_obj[u'id'],
|
|
['id','created_time'],
|
|
(datetime.now().date()-week_delta).isoformat(),
|
|
datetime.now().date().isoformat())
|
|
|
|
|
|
# In[23]:
|
|
|
|
for pp in res:
|
|
# Post
|
|
post_complet = getpostmetacomplet(graph,
|
|
pp[u'id'],
|
|
['message','created_time','id',
|
|
'status_type','shares','link',
|
|
'via'])
|
|
# Like
|
|
all_post_likes = getpostreact(graph,pp[u'id'],
|
|
['id','name'],
|
|
'likes',
|
|
'stream')
|
|
like_count = len(all_post_likes)
|
|
post_complet.update({u'like_count':like_count})
|
|
# Sauvegarde des "post"
|
|
index_post(post_complet)
|
|
# Sauvegarde des "like"
|
|
like_dicts = [dict_update(x,{u'like_time':pp['created_time'],
|
|
u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes]
|
|
for l in like_dicts:
|
|
index_like(l)
|
|
# Comments
|
|
res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream')
|
|
for cc in res_comments:
|
|
comment_complet = getpostmetacomplet(graph,
|
|
cc[u'id'],
|
|
['id','from','message',
|
|
'created_time','comment_count','like_count',
|
|
'parent'])
|
|
# Sauvegarde des "comments"
|
|
index_comments(comment_complet)
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|