premiere version code fonctionnel
This commit is contained in:
commit
14cc4a39fa
2 changed files with 567 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
creds
|
||||
.ipynb_checkpoints
|
||||
.ipynb_checkpoints/*
|
||||
.Rhistory
|
563
facebook_app.ipynb
Normal file
563
facebook_app.ipynb
Normal file
|
@ -0,0 +1,563 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Ce code sert à extraire des publications d'une page Facebook et les enregistrer dans ElasticSearch\n",
|
||||
"\n",
|
||||
"Reesources: \n",
|
||||
"\n",
|
||||
"- Facebook SDK pour Python https://github.com/mobolic/facebook-sdk\n",
|
||||
"- Documentation Facebook du feed https://developers.facebook.com/docs/graph-api/reference/v2.8/page/feed\n",
|
||||
"- Documentation Facebook des commentaires https://developers.facebook.com/docs/graph-api/reference/v2.8/object/comments\n",
|
||||
"- Aller chercher un access token pour Facebook http://stackoverflow.com/a/26844734\n",
|
||||
"- Python datetime: https://docs.python.org/2/library/datetime.html\n",
|
||||
"- Python FB Pagination https://stackoverflow.com/questions/28589239/python-facebook-api-cursor-pagination"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import facebook\n",
|
||||
"from elasticsearch import Elasticsearch\n",
|
||||
"from datetime import datetime\n",
|
||||
"from datetime import timedelta"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"app_id=''\n",
|
||||
"app_secret=''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_fb_token(app_id, app_secret): \n",
|
||||
" payload = {'grant_type': 'client_credentials', 'client_id': app_id, 'client_secret': app_secret}\n",
|
||||
" file = requests.post('https://graph.facebook.com/oauth/access_token?', params = payload)\n",
|
||||
" #print file.text #to test what the FB api responded with \n",
|
||||
" result = file.text.split(\"=\")[1]\n",
|
||||
" #print file.text #to test the TOKEN\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"access_token=get_fb_token(app_id,app_secret)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"week_delta = timedelta(days=7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"es = Elasticsearch()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{u'acknowledged': True, u'shards_acknowledged': True}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"es.indices.create(index='fb_page_post', ignore=400)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#es.indices.delete(index='fb_page_post')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = facebook.GraphAPI(access_token=access_token, version='2.7')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page_obj = graph.get_object('PartiConservateurDuQuebec')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def index_post(post):\n",
|
||||
" es.index(\n",
|
||||
" index=\"fb_page_post\",\n",
|
||||
" doc_type=\"fb_page_post_data\", \n",
|
||||
" id=post[u'id'],\n",
|
||||
" timestamp=post[u'created_time'],\n",
|
||||
" body=post)\n",
|
||||
"def index_comments(post):\n",
|
||||
" es.index(\n",
|
||||
" index=\"fb_page_post\",\n",
|
||||
" doc_type=\"fb_page_post_comment\", \n",
|
||||
" id=post[u'id'],\n",
|
||||
" timestamp=post[u'created_time'],\n",
|
||||
" body=post)\n",
|
||||
"def index_like(post):\n",
|
||||
" es.index(\n",
|
||||
" index=\"fb_page_post\",\n",
|
||||
" doc_type=\"fb_page_post_like\", \n",
|
||||
" id=post[u'like_id'],\n",
|
||||
" timestamp=post[u'like_time'],\n",
|
||||
" body=post)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def getfbpostsfrompage(fb_graph,page_id,field_list,time_since,time_until):\n",
|
||||
" all_posts = []\n",
|
||||
" res = fb_graph.get_object('/'+\n",
|
||||
" page_id+\n",
|
||||
" '/posts?fields='+','.join(field_list)+\n",
|
||||
" '&since='+time_since+\n",
|
||||
" '&until='+time_until)\n",
|
||||
" while(True):\n",
|
||||
" try:\n",
|
||||
" for page in res[u'data']:\n",
|
||||
" all_posts.append(page)\n",
|
||||
" res=requests.get(res['paging']['next']).json()\n",
|
||||
" except KeyError:\n",
|
||||
" break\n",
|
||||
" return all_posts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ttt = getfbpostsfrompage(graph,\n",
|
||||
" page_obj[u'id'],\n",
|
||||
" ['id','created_time'],\n",
|
||||
" (datetime.now().date()-week_delta).isoformat(),\n",
|
||||
" datetime.now().date().isoformat())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ttt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def getpostmetacomplet(fb_graph,post_id,field_list):\n",
|
||||
" post_meta_complet = fb_graph.get_object('/'+\n",
|
||||
" post_id+\n",
|
||||
" '?fields='+','.join(field_list))\n",
|
||||
" return post_meta_complet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ppp_complet = getpostmetacomplet(graph,ttt[0][u'id'],['message','created_time','id','status_type','shares','link','via'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ppp_complet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def getpostreact(fb_graph,post_id,field_list,react_type,filter_type):\n",
|
||||
" res = fb_graph.get_object('/'+post_id+\n",
|
||||
" '/'+react_type+'/?fields='+','.join(field_list)+\n",
|
||||
" '&filter='+filter_type)\n",
|
||||
" all_comments = []\n",
|
||||
" while(True):\n",
|
||||
" try:\n",
|
||||
" for comment in res[u'data']:\n",
|
||||
" all_comments.append(comment)\n",
|
||||
" res=requests.get(res[u'paging'][u'next']).json()\n",
|
||||
" except KeyError:\n",
|
||||
" break\n",
|
||||
" return all_comments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def dict_update(l,x):\n",
|
||||
" l.update(x)\n",
|
||||
" return l"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ccc = getpostreact(graph,ttt[0][u'id'],['id'],'comments','stream')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"[x[u'id'] for x in ccc[1:10]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"[getpostmetacomplet(graph,x[u'id'],['id','from','message','created_time','comment_count','like_count','parent']) for x in ccc[1:10]] "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"rrr = getpostreact(graph,ttt[0][u'id'],['id','name'],'likes','stream')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"rrr[0]['id']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ttt[0][u'id']+'_'+rrr[0]['id']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"rrr[0]['id']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"like_dicts = [dict_update(x,{'like_time':ttt[0][u'created_time'],\n",
|
||||
" 'like_id':ttt[0][u'id']+'_'+x['id']}) for x in rrr]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"like_dicts[1:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ppp_complet.update({u'likes':rrr})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TEST\n",
|
||||
"ppp_complet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"res = getfbpostsfrompage(graph,\n",
|
||||
" page_obj[u'id'],\n",
|
||||
" ['id','created_time'],\n",
|
||||
" (datetime.now().date()-week_delta).isoformat(),\n",
|
||||
" datetime.now().date().isoformat())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for pp in res:\n",
|
||||
" # Post\n",
|
||||
" post_complet = getpostmetacomplet(graph,\n",
|
||||
" pp[u'id'],\n",
|
||||
" ['message','created_time','id',\n",
|
||||
" 'status_type','shares','link',\n",
|
||||
" 'via'])\n",
|
||||
" # Like\n",
|
||||
" all_post_likes = getpostreact(graph,pp[u'id'],\n",
|
||||
" ['id','name'],\n",
|
||||
" 'likes',\n",
|
||||
" 'stream')\n",
|
||||
" like_count = len(all_post_likes)\n",
|
||||
" post_complet.update({u'like_count':like_count})\n",
|
||||
" # Sauvegarde des \"post\"\n",
|
||||
" index_post(post_complet)\n",
|
||||
" # Sauvegarde des \"like\"\n",
|
||||
" like_dicts = [dict_update(x,{u'like_time':pp['created_time'],\n",
|
||||
" u'like_id':pp[u'id']+'_'+x['id']}) for x in all_post_likes]\n",
|
||||
" for l in like_dicts:\n",
|
||||
" index_like(l)\n",
|
||||
" # Comments\n",
|
||||
" res_comments = getpostreact(graph,pp[u'id'],['id'],'comments','stream')\n",
|
||||
" for cc in res_comments:\n",
|
||||
" comment_complet = getpostmetacomplet(graph,\n",
|
||||
" cc[u'id'],\n",
|
||||
" ['id','from','message',\n",
|
||||
" 'created_time','comment_count','like_count',\n",
|
||||
" 'parent'])\n",
|
||||
" # Sauvegarde des \"comments\"\n",
|
||||
" index_comments(comment_complet)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "Python [default]",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
Loading…
Reference in a new issue