script
This commit is contained in:
parent
14cc4a39fa
commit
93574518ff
1 changed files with 295 additions and 0 deletions
295
facebook_app.py
Normal file
295
facebook_app.py
Normal file
|
@ -0,0 +1,295 @@
|
|||
|
||||
# coding: utf-8
|
||||
|
||||
# Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch
|
||||
#
|
||||
# Reesources:
|
||||
#
|
||||
# - Facebook SDK pour Python https://github.com/mobolic/facebook-sdk
|
||||
# - Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed
|
||||
# - Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments
|
||||
# - Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734
|
||||
# - Python datetime: https://docs.python.org/2/library/datetime.html
|
||||
# - Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination
|
||||
|
||||
# In[1]:
|
||||
|
||||
import requests
|
||||
import facebook
|
||||
from elasticsearch import Elasticsearch
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
|
||||
|
||||
# In[2]:
|
||||
|
||||
app_id=os.environ['FB_APP_ID']
|
||||
app_secret=os.environ['FB_APP_SECRET']
|
||||
|
||||
|
||||
# In[3]:
|
||||
|
||||
def get_fb_token(app_id, app_secret):
|
||||
payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret}
|
||||
file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload)
|
||||
#print file.text #to test what the FB api responded with
|
||||
result = file.text.split("=")[1]
|
||||
#print file.text #to test the TOKEN
|
||||
return result
|
||||
|
||||
|
||||
# In[4]:
|
||||
|
||||
access_token=get_fb_token(app_id,app_secret)
|
||||
|
||||
|
||||
# In[5]:
|
||||
|
||||
week_delta = timedelta(days=7)
|
||||
|
||||
|
||||
# In[6]:
|
||||
|
||||
es = Elasticsearch()
|
||||
|
||||
|
||||
# In[11]:
|
||||
|
||||
es.indices.create(index='fb_page_post', ignore=400)
|
||||
|
||||
|
||||
# In[10]:
|
||||
|
||||
#es.indices.delete(index='fb_page_post')
|
||||
|
||||
|
||||
# In[18]:
|
||||
|
||||
graph = facebook.GraphAPI(access_token=access_token, version='2.7')
|
||||
|
||||
|
||||
# In[19]:
|
||||
|
||||
page_obj = graph.get_object('PartiConservateurDuQuebec')
|
||||
|
||||
|
||||
# In[12]:
|
||||
|
||||
def index_post(post):
|
||||
es.index(
|
||||
index="fb_page_post",
|
||||
doc_type="fb_page_post_data",
|
||||
id=post[u'id'],
|
||||
timestamp=post[u'created_time'],
|
||||
body=post)
|
||||
def index_comments(post):
|
||||
es.index(
|
||||
index="fb_page_post",
|
||||
doc_type="fb_page_post_comment",
|
||||
id=post[u'id'],
|
||||
timestamp=post[u'created_time'],
|
||||
body=post)
|
||||
def index_like(post):
|
||||
es.index(
|
||||
index="fb_page_post",
|
||||
doc_type="fb_page_post_like",
|
||||
id=post[u'like_id'],
|
||||
timestamp=post[u'like_time'],
|
||||
body=post)
|
||||
|
||||
|
||||
# In[13]:
|
||||
|
||||
def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until):
|
||||
all_posts = []
|
||||
res = fb_graph.get_object('/'+
|
||||
page_id+
|
||||
'/posts?fields='+','.join(field_list)+
|
||||
'&since='+time_since+
|
||||
'&until='+time_until)
|
||||
while(True):
|
||||
try:
|
||||
for page in res[u'data']:
|
||||
all_posts.append(page)
|
||||
res=requests.get(res['paging']['next']).json()
|
||||
except KeyError:
|
||||
break
|
||||
return all_posts
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ttt = getfbpostsfrompage(graph,
|
||||
page_obj[u'id'],
|
||||
['id','created_time'],
|
||||
(datetime.now().date()-week_delta).isoformat(),
|
||||
datetime.now().date().isoformat())
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ttt
|
||||
|
||||
|
||||
# In[14]:
|
||||
|
||||
def getpostmetacomplet(fb_graph,post_id,field_list):
|
||||
post_meta_complet = fb_graph.get_object('/'+
|
||||
post_id+
|
||||
'?fields='+','.join(field_list))
|
||||
return post_meta_complet
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via'])
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ppp_complet
|
||||
|
||||
|
||||
# In[15]:
|
||||
|
||||
def getpostreact(fb_graph,post_id,field_list,react_type,filter_type):
|
||||
res = fb_graph.get_object('/'+post_id+
|
||||
'/'+react_type+'/?fields='+','.join(field_list)+
|
||||
'&filter='+filter_type)
|
||||
all_comments = []
|
||||
while(True):
|
||||
try:
|
||||
for comment in res[u'data']:
|
||||
all_comments.append(comment)
|
||||
res=requests.get(res[u'paging'][u'next']).json()
|
||||
except KeyError:
|
||||
break
|
||||
return all_comments
|
||||
|
||||
|
||||
# In[16]:
|
||||
|
||||
def dict_update(l,x):
|
||||
l.update(x)
|
||||
return l
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
[x[u'id'] for x in ccc[1:10]]
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
[getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]]
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
rrr[0]['id']
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ttt[0][u'id']+'_'+rrr[0]['id']
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
rrr[0]['id']
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'],
|
||||
'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr]
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
like_dicts[1:5]
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ppp_complet.update({u'likes':rrr})
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
#TEST
|
||||
ppp_complet
|
||||
|
||||
|
||||
# In[22]:
|
||||
|
||||
res = getfbpostsfrompage(graph,
|
||||
page_obj[u'id'],
|
||||
['id','created_time'],
|
||||
(datetime.now().date()-week_delta).isoformat(),
|
||||
datetime.now().date().isoformat())
|
||||
|
||||
|
||||
# In[23]:
|
||||
|
||||
for pp in res:
|
||||
# Post
|
||||
post_complet = getpostmetacomplet(graph,
|
||||
pp[u'id'],
|
||||
['message','created_time','id',
|
||||
'status_type','shares','link',
|
||||
'via'])
|
||||
# Like
|
||||
all_post_likes = getpostreact(graph,pp[u'id'],
|
||||
['id','name'],
|
||||
'likes',
|
||||
'stream')
|
||||
like_count = len(all_post_likes)
|
||||
post_complet.update({u'like_count':like_count})
|
||||
# Sauvegarde des "post"
|
||||
index_post(post_complet)
|
||||
# Sauvegarde des "like"
|
||||
like_dicts = [dict_update(x,{u'like_time':pp['created_time'],
|
||||
u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes]
|
||||
for l in like_dicts:
|
||||
index_like(l)
|
||||
# Comments
|
||||
res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream')
|
||||
for cc in res_comments:
|
||||
comment_complet = getpostmetacomplet(graph,
|
||||
cc[u'id'],
|
||||
['id','from','message',
|
||||
'created_time','comment_count','like_count',
|
||||
'parent'])
|
||||
# Sauvegarde des "comments"
|
||||
index_comments(comment_complet)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
Loading…
Reference in a new issue