facebook-page-stats/facebook_app.py


# coding: utf-8

# Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch
#
# Reesources:
#
# - Facebook SDK pour Python https://github.com/mobolic/facebook-sdk
# - Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed
# - Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments
# - Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734
# - Python datetime: https://docs.python.org/2/library/datetime.html
# - Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination

# In[1]:

import requests
import facebook
from elasticsearch import Elasticsearch
from datetime import datetime
from datetime import timedelta


# In[2]:

app_id=os.environ['FB_APP_ID']
app_secret=os.environ['FB_APP_SECRET']


# In[3]:

def get_fb_token(app_id, app_secret):
    payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret}
    file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload)
    #print file.text #to test what the FB api responded with
    result = file.text.split("=")[1]
    #print file.text #to test the TOKEN
    return result


# In[4]:

access_token=get_fb_token(app_id,app_secret)


# In[5]:

week_delta = timedelta(days=7)


# In[6]:

es = Elasticsearch()


# In[11]:

es.indices.create(index='fb_page_post', ignore=400)


# In[10]:

#es.indices.delete(index='fb_page_post')


# In[18]:

graph = facebook.GraphAPI(access_token=access_token, version='2.7')


# In[19]:

page_obj = graph.get_object('PartiConservateurDuQuebec')


# In[12]:

def index_post(post):
    es.index(
        index="fb_page_post",
        doc_type="fb_page_post_data",
        id=post[u'id'],
        timestamp=post[u'created_time'],
        body=post)
def index_comments(post):
    es.index(
        index="fb_page_post",
        doc_type="fb_page_post_comment",
        id=post[u'id'],
        timestamp=post[u'created_time'],
        body=post)
def index_like(post):
    es.index(
        index="fb_page_post",
        doc_type="fb_page_post_like",
        id=post[u'like_id'],
        timestamp=post[u'like_time'],
        body=post)


# In[13]:

def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until):
    all_posts = []
    res = fb_graph.get_object('/'+
                              page_id+
                              '/posts?fields='+','.join(field_list)+
                              '&since='+time_since+
                              '&until='+time_until)
    while(True):
        try:
            for page in res[u'data']:
                all_posts.append(page)
            res=requests.get(res['paging']['next']).json()
        except KeyError:
            break
    return all_posts


# In[ ]:

#TEST
ttt = getfbpostsfrompage(graph,
                         page_obj[u'id'],
                         ['id','created_time'],
                         (datetime.now().date()-week_delta).isoformat(),
                         datetime.now().date().isoformat())


# In[ ]:

#TEST
ttt


# In[14]:

def getpostmetacomplet(fb_graph,post_id,field_list):
    post_meta_complet = fb_graph.get_object('/'+
                        post_id+
                        '?fields='+','.join(field_list))
    return post_meta_complet


# In[ ]:

#TEST
ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via'])


# In[ ]:

#TEST
ppp_complet


# In[15]:

def getpostreact(fb_graph,post_id,field_list,react_type,filter_type):
    res = fb_graph.get_object('/'+post_id+
                              '/'+react_type+'/?fields='+','.join(field_list)+
                              '&filter='+filter_type)
    all_comments = []
    while(True):
        try:
            for comment in res[u'data']:
                all_comments.append(comment)
            res=requests.get(res[u'paging'][u'next']).json()
        except KeyError:
            break
    return all_comments


# In[16]:

def dict_update(l,x):
    l.update(x)
    return l


# In[ ]:

#TEST
ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream')


# In[ ]:

#TEST
[x[u'id'] for x in ccc[1:10]]


# In[ ]:

#TEST
[getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]]


# In[ ]:

#TEST
rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream')


# In[ ]:

#TEST
rrr[0]['id']


# In[ ]:

#TEST
ttt[0][u'id']+'_'+rrr[0]['id']


# In[ ]:

#TEST
rrr[0]['id']


# In[ ]:

#TEST
like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'],
                             'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr]


# In[ ]:

#TEST
like_dicts[1:5]


# In[ ]:

#TEST
ppp_complet.update({u'likes':rrr})


# In[ ]:

#TEST
ppp_complet


# In[22]:

res = getfbpostsfrompage(graph,
                         page_obj[u'id'],
                         ['id','created_time'],
                         (datetime.now().date()-week_delta).isoformat(),
                         datetime.now().date().isoformat())


# In[23]:

for pp in res:
    # Post
    post_complet = getpostmetacomplet(graph,
                                      pp[u'id'],
                                      ['message','created_time','id',
                                       'status_type','shares','link',
                                       'via'])
    # Like
    all_post_likes = getpostreact(graph,pp[u'id'],
                                  ['id','name'],
                                  'likes',
                                  'stream')
    like_count = len(all_post_likes)
    post_complet.update({u'like_count':like_count})
    # Sauvegarde des "post"
    index_post(post_complet)
    # Sauvegarde des "like"
    like_dicts = [dict_update(x,{u'like_time':pp['created_time'],
                                 u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes]
    for l in like_dicts:
        index_like(l)
    # Comments
    res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream')
    for cc in res_comments:
        comment_complet = getpostmetacomplet(graph,
                                             cc[u'id'],
                                             ['id','from','message',
                                              'created_time','comment_count','like_count',
                                              'parent'])
        # Sauvegarde des "comments"
        index_comments(comment_complet)


# In[ ]: