From 30afc79e6c7fb7b1a8a483f6ec173c64a8994275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Pelletier?= Date: Sun, 17 Dec 2017 01:02:37 -0500 Subject: [PATCH] Initial commit --- .ipynb_checkpoints/BLS-checkpoint.ipynb | 597 +++++++++++++++++ ...ping Environnement Canada-checkpoint.ipynb | 238 +++++++ BLS.ipynb | 633 ++++++++++++++++++ Scraping Environnement Canada.ipynb | 261 ++++++++ 4 files changed, 1729 insertions(+) create mode 100644 .ipynb_checkpoints/BLS-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/Scraping Environnement Canada-checkpoint.ipynb create mode 100644 BLS.ipynb create mode 100644 Scraping Environnement Canada.ipynb diff --git a/.ipynb_checkpoints/BLS-checkpoint.ipynb b/.ipynb_checkpoints/BLS-checkpoint.ipynb new file mode 100644 index 0000000..19a5fe6 --- /dev/null +++ b/.ipynb_checkpoints/BLS-checkpoint.ipynb @@ -0,0 +1,597 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import prettytable" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "headers = {'Content-type': 'application/json'}\n", + "data = json.dumps({\"seriesid\": ['CUUR0000SA0','SUUR0000SA0'],\"startyear\":\"2011\", \"endyear\":\"2014\"})\n", + "p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "json_data = json.loads(p.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Results': {'series': [{'data': [{'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '134.207',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '135.107',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '135.891',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '136.211',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '136.127',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '136.392',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '136.433',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '136.216',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '135.771',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '135.375',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '134.542',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '134.017',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '133.509',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '133.596',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '133.876',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '134.255',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '134.098',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '133.919',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '133.900',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '133.626',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '133.421',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '133.558',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '133.204',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '132.137',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '131.770',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '132.208',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '132.892',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '132.988',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '132.430',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '131.731',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '131.956',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '132.154',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '132.284',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '131.905',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '130.953',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '130.438',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '129.844',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '130.196',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '130.373',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '130.635',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '130.351',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '129.983',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '129.846',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '129.999',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '129.483',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '128.585',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '127.363',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '126.778',\n", + " 'year': '2011'}],\n", + " 'seriesID': 'SUUR0000SA0'},\n", + " {'data': [{'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '234.812',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '236.151',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '237.433',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '238.031',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '237.852',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '238.250',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '238.343',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '237.900',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '237.072',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '236.293',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '234.781',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '233.916',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '233.049',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '233.069',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '233.546',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '234.149',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '233.877',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '233.596',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '233.504',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '232.945',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '232.531',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '232.773',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '232.166',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '230.280',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '229.601',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '230.221',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '231.317',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '231.407',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '230.379',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '229.104',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '229.478',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '229.815',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '230.085',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '229.392',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '227.663',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '226.665',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '225.672',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '226.230',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '226.421',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '226.889',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '226.545',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '225.922',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '225.722',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '225.964',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '224.906',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '223.467',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '221.309',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '220.223',\n", + " 'year': '2011'}],\n", + " 'seriesID': 'CUUR0000SA0'}]},\n", + " 'message': [],\n", + " 'responseTime': 73,\n", + " 'status': 'REQUEST_SUCCEEDED'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json_data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "for series in json_data['Results']['series']:\n", + " x=prettytable.PrettyTable([\"series id\",\"year\",\"period\",\"value\",\"footnotes\"])\n", + " seriesId = series['seriesID']\n", + " for item in series['data']:\n", + " year = item['year']\n", + " period = item['period']\n", + " value = item['value']\n", + " footnotes=\"\"\n", + " for footnote in item['footnotes']:\n", + " if footnote:\n", + " footnotes = footnotes + footnote['text'] + ','\n", + " if 'M01' <= period <= 'M12':\n", + " x.add_row([seriesId,year,period,value,footnotes[0:-1]])\n", + " output = open(seriesId + '.txt','w')\n", + " output.write (x.get_string())\n", + " output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/.ipynb_checkpoints/Scraping Environnement Canada-checkpoint.ipynb b/.ipynb_checkpoints/Scraping Environnement Canada-checkpoint.ipynb new file mode 100644 index 0000000..f09ab66 --- /dev/null +++ b/.ipynb_checkpoints/Scraping Environnement Canada-checkpoint.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Web scraping avec Python\n", + "## RoboBrowser\n", + "\n", + "- RoboBrowser est une librairie python qui permet de simuler un comportement de navigation dans un navigateur web.\n", + "- Elle permet aussi d'extraire des éléments précis d'information d'une page et de les structurer. \n", + "- C'est la plus populaire qui combine les avantages de `beautifulsoup` avec la flexibilité de `requests`. \n", + "- Elle est disponible pour Python 2.7 et Python 3.6" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "from robobrowser import RoboBrowser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On crée un objet navigateur et on navigue vers une première URL" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "browser = RoboBrowser(history=True,parser=\"lxml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "browser.open('https://meteo.gc.ca/')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ouverture d'un formulaire à partir d'une de ses propriétés (ici, son identifiant unique)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "form = browser.get_form(id=\"cityjump\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En affichant le formulaire, on peut voir les différents champs disponibles, ainsi que les valeurs par défauts qui sont attribuées, si applicable" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "form" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En utilisant la propriété `value`, on peut remplir le formulaire" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "form['city'].value = 'Lévis'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On envoie ensuite le formulaire, comme si on cliquait sur le bouton d'envoi" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "browser.submit_form(form)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On obtient le résultat de la page suivante dans le navigatuer. À partir de ce résultat, on peut extraire différents éléments en utilisant le sélecteur `CSS`." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "location_xml = browser. \\\n", + "select('.col-sm-10')[0]. \\\n", + "select('dl.mrgn-bttm-0')[0]. \\\n", + "select('dd.mrgn-bttm-0')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lorsqu'un motif se répète, on préfère alors créer une fonction qui va extraire l'élément selon des paramètres. La librairie RoboBrowser ne gere pas les noeuds enfants de la structure XML (sélecteur `nth_child()`), mais on peut utiliser les listes de Python pour répliquer un comportement similaire." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "def temperature_xml(jour): \n", + " return browser. \\\n", + " select('div.div-column')[jour]. \\\n", + " select('div')[1]. \\\n", + " select('p.mrgn-bttm-0')[0]. \\\n", + " select('span')[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On peut assembler les données extraites dans un dictionnaire python et les utiliser dans son application." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'city': 'Beauport',\n", + " 'date': '16h00 HNE le vendredi 8 décembre 2017',\n", + " 'temperature': ['-2°C\\n', '-4°C\\n', '-13°C\\n', '-9°C\\n', '-8°C\\n', '-11°C\\n']}" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{'city': location_xml[0].text, \n", + " 'date': location_xml[1].text,\n", + " 'temperature': [temperature_xml(i).text for i in range(1,7)]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/BLS.ipynb b/BLS.ipynb new file mode 100644 index 0000000..9aa47e9 --- /dev/null +++ b/BLS.ipynb @@ -0,0 +1,633 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import prettytable" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "headers = {'Content-type': 'application/json'}\n", + "data = json.dumps({\"seriesid\": ['CUUR0000SA0','SUUR0000SA0'],\"startyear\":\"2011\", \"endyear\":\"2014\"})\n", + "p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Date': 'Sat, 09 Dec 2017 18:37:00 GMT', 'Strict-Transport-Security': 'max-age=31536000; includeSubdomains;', 'Set-Cookie': 'JSESSIONID=221353A17A38C985D55C35C2FE5AF9D3; Version=1; Path=\"/publicAPI/\"; Secure; HttpOnly', 'Content-Type': 'application/json', 'Content-Length': '8624', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.headers" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"status\":\"REQUEST_SUCCEEDED\",\"responseTime\":70,\"message\":[],\"Results\":{\\n\"series\":\\n[{\"seriesID\":\"SUUR0000SA0\",\"data\":[{\"year\":\"2014\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"134.207\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"135.107\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"135.891\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"136.211\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"136.127\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"136.392\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"136.433\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"136.216\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"135.771\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"135.375\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"134.542\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"134.017\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"133.509\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"133.596\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"133.876\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"134.255\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"134.098\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"133.919\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"133.900\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"133.626\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"133.421\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"133.558\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"133.204\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"132.137\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"131.770\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"132.208\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"132.892\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"132.988\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"132.430\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"131.731\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"131.956\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"132.154\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"132.284\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"131.905\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"130.953\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"130.438\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"129.844\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"130.196\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"130.373\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"130.635\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"130.351\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"129.983\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"129.846\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"129.999\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"129.483\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"128.585\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"127.363\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"126.778\",\"footnotes\":[{}]}]},\\n{\"seriesID\":\"CUUR0000SA0\",\"data\":[{\"year\":\"2014\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"234.812\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"236.151\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"237.433\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"238.031\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"237.852\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"238.250\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"238.343\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"237.900\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"237.072\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"236.293\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"234.781\",\"footnotes\":[{}]},{\"year\":\"2014\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"233.916\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"233.049\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"233.069\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"233.546\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"234.149\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"233.877\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"233.596\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"233.504\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"232.945\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"232.531\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"232.773\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"232.166\",\"footnotes\":[{}]},{\"year\":\"2013\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"230.280\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"229.601\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"230.221\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"231.317\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"231.407\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"230.379\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"229.104\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"229.478\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"229.815\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"230.085\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"229.392\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"227.663\",\"footnotes\":[{}]},{\"year\":\"2012\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"226.665\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M12\",\"periodName\":\"December\",\"value\":\"225.672\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M11\",\"periodName\":\"November\",\"value\":\"226.230\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M10\",\"periodName\":\"October\",\"value\":\"226.421\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M09\",\"periodName\":\"September\",\"value\":\"226.889\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M08\",\"periodName\":\"August\",\"value\":\"226.545\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M07\",\"periodName\":\"July\",\"value\":\"225.922\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M06\",\"periodName\":\"June\",\"value\":\"225.722\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M05\",\"periodName\":\"May\",\"value\":\"225.964\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M04\",\"periodName\":\"April\",\"value\":\"224.906\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M03\",\"periodName\":\"March\",\"value\":\"223.467\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M02\",\"periodName\":\"February\",\"value\":\"221.309\",\"footnotes\":[{}]},{\"year\":\"2011\",\"period\":\"M01\",\"periodName\":\"January\",\"value\":\"220.223\",\"footnotes\":[{}]}]}]\\n}}'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "p.text" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "json_data = json.loads(p.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Results': {'series': [{'data': [{'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '134.207',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '135.107',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '135.891',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '136.211',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '136.127',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '136.392',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '136.433',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '136.216',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '135.771',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '135.375',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '134.542',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '134.017',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '133.509',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '133.596',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '133.876',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '134.255',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '134.098',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '133.919',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '133.900',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '133.626',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '133.421',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '133.558',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '133.204',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '132.137',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '131.770',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '132.208',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '132.892',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '132.988',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '132.430',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '131.731',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '131.956',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '132.154',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '132.284',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '131.905',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '130.953',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '130.438',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '129.844',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '130.196',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '130.373',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '130.635',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '130.351',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '129.983',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '129.846',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '129.999',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '129.483',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '128.585',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '127.363',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '126.778',\n", + " 'year': '2011'}],\n", + " 'seriesID': 'SUUR0000SA0'},\n", + " {'data': [{'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '234.812',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '236.151',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '237.433',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '238.031',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '237.852',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '238.250',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '238.343',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '237.900',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '237.072',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '236.293',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '234.781',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '233.916',\n", + " 'year': '2014'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '233.049',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '233.069',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '233.546',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '234.149',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '233.877',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '233.596',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '233.504',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '232.945',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '232.531',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '232.773',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '232.166',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '230.280',\n", + " 'year': '2013'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '229.601',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '230.221',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '231.317',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '231.407',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '230.379',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '229.104',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '229.478',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '229.815',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '230.085',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '229.392',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '227.663',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '226.665',\n", + " 'year': '2012'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M12',\n", + " 'periodName': 'December',\n", + " 'value': '225.672',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M11',\n", + " 'periodName': 'November',\n", + " 'value': '226.230',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M10',\n", + " 'periodName': 'October',\n", + " 'value': '226.421',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M09',\n", + " 'periodName': 'September',\n", + " 'value': '226.889',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M08',\n", + " 'periodName': 'August',\n", + " 'value': '226.545',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M07',\n", + " 'periodName': 'July',\n", + " 'value': '225.922',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M06',\n", + " 'periodName': 'June',\n", + " 'value': '225.722',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M05',\n", + " 'periodName': 'May',\n", + " 'value': '225.964',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M04',\n", + " 'periodName': 'April',\n", + " 'value': '224.906',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M03',\n", + " 'periodName': 'March',\n", + " 'value': '223.467',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M02',\n", + " 'periodName': 'February',\n", + " 'value': '221.309',\n", + " 'year': '2011'},\n", + " {'footnotes': [{}],\n", + " 'period': 'M01',\n", + " 'periodName': 'January',\n", + " 'value': '220.223',\n", + " 'year': '2011'}],\n", + " 'seriesID': 'CUUR0000SA0'}]},\n", + " 'message': [],\n", + " 'responseTime': 70,\n", + " 'status': 'REQUEST_SUCCEEDED'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json_data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "for series in json_data['Results']['series']:\n", + " x=prettytable.PrettyTable([\"series id\",\"year\",\"period\",\"value\",\"footnotes\"])\n", + " seriesId = series['seriesID']\n", + " for item in series['data']:\n", + " year = item['year']\n", + " period = item['period']\n", + " value = item['value']\n", + " footnotes=\"\"\n", + " for footnote in item['footnotes']:\n", + " if footnote:\n", + " footnotes = footnotes + footnote['text'] + ','\n", + " if 'M01' <= period <= 'M12':\n", + " x.add_row([seriesId,year,period,value,footnotes[0:-1]])\n", + " output = open(seriesId + '.txt','w')\n", + " output.write (x.get_string())\n", + " output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Scraping Environnement Canada.ipynb b/Scraping Environnement Canada.ipynb new file mode 100644 index 0000000..bd613a6 --- /dev/null +++ b/Scraping Environnement Canada.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Web scraping avec Python\n", + "## RoboBrowser\n", + "\n", + "- RoboBrowser est une librairie python qui permet de simuler un comportement de navigation dans un navigateur web.\n", + "- Elle permet aussi d'extraire des éléments précis d'information d'une page et de les structurer. \n", + "- C'est la plus populaire qui combine les avantages de `beautifulsoup` avec la flexibilité de `requests`. \n", + "- Elle est disponible pour Python 2.7 et Python 3.6" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "from robobrowser import RoboBrowser" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On crée un objet navigateur et on navigue vers une première URL" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "browser = RoboBrowser(history=True,parser=\"lxml\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "browser.open('https://meteo.gc.ca/')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ouverture d'un formulaire à partir d'une de ses propriétés (ici, son identifiant unique)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "form = browser.get_form(id=\"cityjump\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En affichant le formulaire, on peut voir les différents champs disponibles, ainsi que les valeurs par défauts qui sont attribuées, si applicable" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "form" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En utilisant la propriété `value`, on peut remplir le formulaire" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "form['city'].value = 'Québec'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On envoie ensuite le formulaire, comme si on cliquait sur le bouton d'envoi" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "browser.submit_form(form)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'dl.mrgn-bttm-0'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"dl.mrgn-bttm-0\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On obtient le résultat de la page suivante dans le navigatuer. À partir de ce résultat, on peut extraire différents éléments en utilisant le sélecteur `CSS`." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "location_xml = browser. \\\n", + "select('.col-sm-10')[0]. \\\n", + "select('dl.mrgn-bttm-0')[0]. \\\n", + "select('dd.mrgn-bttm-0')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lorsqu'un motif se répète, on préfère alors créer une fonction qui va extraire l'élément selon des paramètres. La librairie RoboBrowser ne gere pas les noeuds enfants de la structure XML (sélecteur `nth_child()`), mais on peut utiliser les listes de Python pour répliquer un comportement similaire." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"div.div-column:nth-child(4) > div:nth-child(2) > p:nth-child(2) > span:nth-child(1)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def temperature_xml(jour): \n", + " return browser. \\\n", + " select('div.div-column')[jour]. \\\n", + " select('div')[1]. \\\n", + " select('p.mrgn-bttm-0')[0]. \\\n", + " select('span')[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On peut assembler les données extraites dans un dictionnaire python et les utiliser dans son application." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'city': 'Aéroport int. Lesage de Québec',\n", + " 'date': '13h00 HNE le samedi 9 décembre 2017',\n", + " 'temperature': ['-1°C\\n',\n", + " '-2°C\\n',\n", + " '-13°C\\n',\n", + " '-4°C\\n',\n", + " '-8°C\\n',\n", + " '-11°C\\n',\n", + " '-11°C\\n']}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{'city': location_xml[0].text, \n", + " 'date': location_xml[1].text,\n", + " 'temperature': [temperature_xml(i).text for i in range(0,7)]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}