diff --git a/craw_wti_crude_oil_wesite/oil_price.csv b/craw_wti_crude_oil_wesite/oil_price.csv new file mode 100644 index 0000000..9f6465f --- /dev/null +++ b/craw_wti_crude_oil_wesite/oil_price.csv @@ -0,0 +1,35 @@ +,Year,AverageClosing Price,Year Open,Year High,Year Low,Year Close,Annual% Change +0,2020,$55.05,$61.18,$63.27,$49.57,$53.38,-12.58% +1,2019,$57.05,$46.54,$66.30,$46.54,$61.06,34.46% +2,2018,$64.90,$60.37,$76.41,$42.53,$45.41,-24.84% +3,2017,$50.84,$52.33,$60.42,$42.53,$60.42,12.47% +4,2016,$43.58,$36.76,$54.06,$26.21,$53.72,45.03% +5,2015,$48.72,$52.72,$61.43,$34.73,$37.04,-30.70% +6,2014,$93.17,$95.14,$107.95,$53.45,$53.45,-45.55% +7,2013,$97.98,$93.14,$110.62,$86.65,$98.17,6.90% +8,2012,$94.05,$102.96,$109.39,$77.72,$91.83,-7.08% +9,2011,$94.88,$91.59,$113.39,$75.40,$98.83,8.15% +10,2010,$79.48,$81.52,$91.48,$64.78,$91.38,15.10% +11,2009,$61.95,$46.17,$81.03,$34.03,$79.39,78.00% +12,2008,$99.67,$99.64,$145.31,$30.28,$44.60,-53.52% +13,2007,$72.34,$60.77,$99.16,$50.51,$95.95,57.68% +14,2006,$66.05,$63.11,$77.05,$55.90,$60.85,-0.34% +15,2005,$56.64,$42.16,$69.91,$42.16,$61.06,40.82% +16,2004,$41.51,$33.71,$56.37,$32.49,$43.36,33.37% +17,2003,$31.08,$31.97,$37.96,$25.25,$32.51,4.17% +18,2002,$26.19,$21.13,$32.68,$18.02,$31.21,56.36% +19,2001,$25.98,$27.29,$32.21,$17.50,$19.96,-25.30% +20,2000,$30.38,$25.56,$37.22,$23.91,$26.72,3.73% +21,1999,$19.35,$12.42,$28.03,$11.38,$25.76,112.19% +22,1998,$14.42,$17.41,$17.93,$10.82,$12.14,-31.22% +23,1997,$20.61,$25.55,$26.55,$17.60,$17.65,-31.85% +24,1996,$22.12,$19.83,$26.55,$17.33,$25.90,32.55% +25,1995,$18.43,$17.45,$20.53,$16.86,$19.54,9.96% +26,1994,$17.20,$14.52,$20.72,$13.89,$17.77,25.23% +27,1993,$18.43,$19.03,$21.05,$13.98,$14.19,-27.19% +28,1992,$20.58,$19.43,$23.03,$17.89,$19.49,1.78% +29,1991,$21.54,$26.53,$32.25,$17.43,$19.15,-32.76% +30,1990,$24.53,$22.88,$41.07,$15.43,$28.48,30.40% +31,1989,$19.64,$17.38,$24.62,$16.99,$21.84,27.57% +32,1988,$15.97,$17.77,$18.54,$12.58,$17.12,2.27% +33,1987,$19.20,$18.13,$22.44,$15.12,$16.74,-6.64% diff --git a/craw_wti_crude_oil_wesite/oilprice.ipynb b/craw_wti_crude_oil_wesite/oilprice.ipynb new file mode 100644 index 0000000..b8ea85d --- /dev/null +++ b/craw_wti_crude_oil_wesite/oilprice.ipynb @@ -0,0 +1,2546 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 导入数据\n", + "`爬取网站`\n", + "https://www.bilibili.com/video/av83309821/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import requests.cookies\n", + "import json\n", + "import time\n", + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "\n", + "\n", + "url='https://www.macrotrends.net/2516/wti-crude-oil-prices-10-year-daily-chart'\n", + "res = requests.get(url)\n", + "\n", + "df = pd.read_html(res.text)\n", + "pan = pd.DataFrame(df[0])\n", + "d={'Year':pan.iloc[:,0],'AverageClosing Price':pan.iloc[:,1],'Year Open':pan.iloc[:,2],'Year High':pan.iloc[:,3],'Year Low':pan.iloc[:,4],'Year Close':pan.iloc[:,5],'Annual% Change':pan.iloc[:,6]}\n", + "df = pd.DataFrame(data=d)\n", + "df.to_csv('oil_price.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import requests.cookies\n", + "import json\n", + "import time\n", + "import pandas as pd\n", + "from bs4 import BeautifulSoup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "下载editcookies`www.editthiscookie.com download`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "url='https://www.macrotrends.net/2516/wti-crude-oil-prices-10-year-daily-chart'\n", + "res = requests.get(url)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res = requests.get(url)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_html(res.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Crude Oil Prices - Historical Annual Data
YearAverageClosing PriceYear OpenYear HighYear LowYear CloseAnnual% Change
02020$55.05$61.18$63.27$49.57$53.38-12.58%
12019$57.05$46.54$66.30$46.54$61.0634.46%
22018$64.90$60.37$76.41$42.53$45.41-24.84%
32017$50.84$52.33$60.42$42.53$60.4212.47%
42016$43.58$36.76$54.06$26.21$53.7245.03%
52015$48.72$52.72$61.43$34.73$37.04-30.70%
62014$93.17$95.14$107.95$53.45$53.45-45.55%
72013$97.98$93.14$110.62$86.65$98.176.90%
82012$94.05$102.96$109.39$77.72$91.83-7.08%
92011$94.88$91.59$113.39$75.40$98.838.15%
102010$79.48$81.52$91.48$64.78$91.3815.10%
112009$61.95$46.17$81.03$34.03$79.3978.00%
122008$99.67$99.64$145.31$30.28$44.60-53.52%
132007$72.34$60.77$99.16$50.51$95.9557.68%
142006$66.05$63.11$77.05$55.90$60.85-0.34%
152005$56.64$42.16$69.91$42.16$61.0640.82%
162004$41.51$33.71$56.37$32.49$43.3633.37%
172003$31.08$31.97$37.96$25.25$32.514.17%
182002$26.19$21.13$32.68$18.02$31.2156.36%
192001$25.98$27.29$32.21$17.50$19.96-25.30%
202000$30.38$25.56$37.22$23.91$26.723.73%
211999$19.35$12.42$28.03$11.38$25.76112.19%
221998$14.42$17.41$17.93$10.82$12.14-31.22%
231997$20.61$25.55$26.55$17.60$17.65-31.85%
241996$22.12$19.83$26.55$17.33$25.9032.55%
251995$18.43$17.45$20.53$16.86$19.549.96%
261994$17.20$14.52$20.72$13.89$17.7725.23%
271993$18.43$19.03$21.05$13.98$14.19-27.19%
281992$20.58$19.43$23.03$17.89$19.491.78%
291991$21.54$26.53$32.25$17.43$19.15-32.76%
301990$24.53$22.88$41.07$15.43$28.4830.40%
311989$19.64$17.38$24.62$16.99$21.8427.57%
321988$15.97$17.77$18.54$12.58$17.122.27%
331987$19.20$18.13$22.44$15.12$16.74-6.64%
\n", + "
" + ], + "text/plain": [ + " Crude Oil Prices - Historical Annual Data \\\n", + " Year AverageClosing Price Year Open \n", + "0 2020 $55.05 $61.18 \n", + "1 2019 $57.05 $46.54 \n", + "2 2018 $64.90 $60.37 \n", + "3 2017 $50.84 $52.33 \n", + "4 2016 $43.58 $36.76 \n", + "5 2015 $48.72 $52.72 \n", + "6 2014 $93.17 $95.14 \n", + "7 2013 $97.98 $93.14 \n", + "8 2012 $94.05 $102.96 \n", + "9 2011 $94.88 $91.59 \n", + "10 2010 $79.48 $81.52 \n", + "11 2009 $61.95 $46.17 \n", + "12 2008 $99.67 $99.64 \n", + "13 2007 $72.34 $60.77 \n", + "14 2006 $66.05 $63.11 \n", + "15 2005 $56.64 $42.16 \n", + "16 2004 $41.51 $33.71 \n", + "17 2003 $31.08 $31.97 \n", + "18 2002 $26.19 $21.13 \n", + "19 2001 $25.98 $27.29 \n", + "20 2000 $30.38 $25.56 \n", + "21 1999 $19.35 $12.42 \n", + "22 1998 $14.42 $17.41 \n", + "23 1997 $20.61 $25.55 \n", + "24 1996 $22.12 $19.83 \n", + "25 1995 $18.43 $17.45 \n", + "26 1994 $17.20 $14.52 \n", + "27 1993 $18.43 $19.03 \n", + "28 1992 $20.58 $19.43 \n", + "29 1991 $21.54 $26.53 \n", + "30 1990 $24.53 $22.88 \n", + "31 1989 $19.64 $17.38 \n", + "32 1988 $15.97 $17.77 \n", + "33 1987 $19.20 $18.13 \n", + "\n", + " \n", + " Year High Year Low Year Close Annual% Change \n", + "0 $63.27 $49.57 $53.38 -12.58% \n", + "1 $66.30 $46.54 $61.06 34.46% \n", + "2 $76.41 $42.53 $45.41 -24.84% \n", + "3 $60.42 $42.53 $60.42 12.47% \n", + "4 $54.06 $26.21 $53.72 45.03% \n", + "5 $61.43 $34.73 $37.04 -30.70% \n", + "6 $107.95 $53.45 $53.45 -45.55% \n", + "7 $110.62 $86.65 $98.17 6.90% \n", + "8 $109.39 $77.72 $91.83 -7.08% \n", + "9 $113.39 $75.40 $98.83 8.15% \n", + "10 $91.48 $64.78 $91.38 15.10% \n", + "11 $81.03 $34.03 $79.39 78.00% \n", + "12 $145.31 $30.28 $44.60 -53.52% \n", + "13 $99.16 $50.51 $95.95 57.68% \n", + "14 $77.05 $55.90 $60.85 -0.34% \n", + "15 $69.91 $42.16 $61.06 40.82% \n", + "16 $56.37 $32.49 $43.36 33.37% \n", + "17 $37.96 $25.25 $32.51 4.17% \n", + "18 $32.68 $18.02 $31.21 56.36% \n", + "19 $32.21 $17.50 $19.96 -25.30% \n", + "20 $37.22 $23.91 $26.72 3.73% \n", + "21 $28.03 $11.38 $25.76 112.19% \n", + "22 $17.93 $10.82 $12.14 -31.22% \n", + "23 $26.55 $17.60 $17.65 -31.85% \n", + "24 $26.55 $17.33 $25.90 32.55% \n", + "25 $20.53 $16.86 $19.54 9.96% \n", + "26 $20.72 $13.89 $17.77 25.23% \n", + "27 $21.05 $13.98 $14.19 -27.19% \n", + "28 $23.03 $17.89 $19.49 1.78% \n", + "29 $32.25 $17.43 $19.15 -32.76% \n", + "30 $41.07 $15.43 $28.48 30.40% \n", + "31 $24.62 $16.99 $21.84 27.57% \n", + "32 $18.54 $12.58 $17.12 2.27% \n", + "33 $22.44 $15.12 $16.74 -6.64% " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + } + ], + "source": [ + "print(len(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Link PreviewHTML Code (Click to Copy)
0WTI Crude Oil Prices - 10 Year Daily ChartNaN
1MacrotrendsNaN
2SourceNaN
\n", + "
" + ], + "text/plain": [ + " Link Preview HTML Code (Click to Copy)\n", + "0 WTI Crude Oil Prices - 10 Year Daily Chart NaN\n", + "1 Macrotrends NaN\n", + "2 Source NaN" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Link PreviewHTML Code (Click to Copy)
0WTI Crude Oil Prices - 10 Year Daily ChartNaN
1MacrotrendsNaN
2SourceNaN
\n", + "
" + ], + "text/plain": [ + " Link Preview HTML Code (Click to Copy)\n", + "0 WTI Crude Oil Prices - 10 Year Daily Chart NaN\n", + "1 Macrotrends NaN\n", + "2 Source NaN" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[2]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Crude Oil Prices - Historical Annual Data
YearAverageClosing PriceYear OpenYear HighYear LowYear CloseAnnual% Change
02020$55.05$61.18$63.27$49.57$53.38-12.58%
12019$57.05$46.54$66.30$46.54$61.0634.46%
22018$64.90$60.37$76.41$42.53$45.41-24.84%
32017$50.84$52.33$60.42$42.53$60.4212.47%
42016$43.58$36.76$54.06$26.21$53.7245.03%
52015$48.72$52.72$61.43$34.73$37.04-30.70%
62014$93.17$95.14$107.95$53.45$53.45-45.55%
72013$97.98$93.14$110.62$86.65$98.176.90%
82012$94.05$102.96$109.39$77.72$91.83-7.08%
92011$94.88$91.59$113.39$75.40$98.838.15%
102010$79.48$81.52$91.48$64.78$91.3815.10%
112009$61.95$46.17$81.03$34.03$79.3978.00%
122008$99.67$99.64$145.31$30.28$44.60-53.52%
132007$72.34$60.77$99.16$50.51$95.9557.68%
142006$66.05$63.11$77.05$55.90$60.85-0.34%
152005$56.64$42.16$69.91$42.16$61.0640.82%
162004$41.51$33.71$56.37$32.49$43.3633.37%
172003$31.08$31.97$37.96$25.25$32.514.17%
182002$26.19$21.13$32.68$18.02$31.2156.36%
192001$25.98$27.29$32.21$17.50$19.96-25.30%
202000$30.38$25.56$37.22$23.91$26.723.73%
211999$19.35$12.42$28.03$11.38$25.76112.19%
221998$14.42$17.41$17.93$10.82$12.14-31.22%
231997$20.61$25.55$26.55$17.60$17.65-31.85%
241996$22.12$19.83$26.55$17.33$25.9032.55%
251995$18.43$17.45$20.53$16.86$19.549.96%
261994$17.20$14.52$20.72$13.89$17.7725.23%
271993$18.43$19.03$21.05$13.98$14.19-27.19%
281992$20.58$19.43$23.03$17.89$19.491.78%
291991$21.54$26.53$32.25$17.43$19.15-32.76%
301990$24.53$22.88$41.07$15.43$28.4830.40%
311989$19.64$17.38$24.62$16.99$21.8427.57%
321988$15.97$17.77$18.54$12.58$17.122.27%
331987$19.20$18.13$22.44$15.12$16.74-6.64%
\n", + "
" + ], + "text/plain": [ + " Crude Oil Prices - Historical Annual Data \\\n", + " Year AverageClosing Price Year Open \n", + "0 2020 $55.05 $61.18 \n", + "1 2019 $57.05 $46.54 \n", + "2 2018 $64.90 $60.37 \n", + "3 2017 $50.84 $52.33 \n", + "4 2016 $43.58 $36.76 \n", + "5 2015 $48.72 $52.72 \n", + "6 2014 $93.17 $95.14 \n", + "7 2013 $97.98 $93.14 \n", + "8 2012 $94.05 $102.96 \n", + "9 2011 $94.88 $91.59 \n", + "10 2010 $79.48 $81.52 \n", + "11 2009 $61.95 $46.17 \n", + "12 2008 $99.67 $99.64 \n", + "13 2007 $72.34 $60.77 \n", + "14 2006 $66.05 $63.11 \n", + "15 2005 $56.64 $42.16 \n", + "16 2004 $41.51 $33.71 \n", + "17 2003 $31.08 $31.97 \n", + "18 2002 $26.19 $21.13 \n", + "19 2001 $25.98 $27.29 \n", + "20 2000 $30.38 $25.56 \n", + "21 1999 $19.35 $12.42 \n", + "22 1998 $14.42 $17.41 \n", + "23 1997 $20.61 $25.55 \n", + "24 1996 $22.12 $19.83 \n", + "25 1995 $18.43 $17.45 \n", + "26 1994 $17.20 $14.52 \n", + "27 1993 $18.43 $19.03 \n", + "28 1992 $20.58 $19.43 \n", + "29 1991 $21.54 $26.53 \n", + "30 1990 $24.53 $22.88 \n", + "31 1989 $19.64 $17.38 \n", + "32 1988 $15.97 $17.77 \n", + "33 1987 $19.20 $18.13 \n", + "\n", + " \n", + " Year High Year Low Year Close Annual% Change \n", + "0 $63.27 $49.57 $53.38 -12.58% \n", + "1 $66.30 $46.54 $61.06 34.46% \n", + "2 $76.41 $42.53 $45.41 -24.84% \n", + "3 $60.42 $42.53 $60.42 12.47% \n", + "4 $54.06 $26.21 $53.72 45.03% \n", + "5 $61.43 $34.73 $37.04 -30.70% \n", + "6 $107.95 $53.45 $53.45 -45.55% \n", + "7 $110.62 $86.65 $98.17 6.90% \n", + "8 $109.39 $77.72 $91.83 -7.08% \n", + "9 $113.39 $75.40 $98.83 8.15% \n", + "10 $91.48 $64.78 $91.38 15.10% \n", + "11 $81.03 $34.03 $79.39 78.00% \n", + "12 $145.31 $30.28 $44.60 -53.52% \n", + "13 $99.16 $50.51 $95.95 57.68% \n", + "14 $77.05 $55.90 $60.85 -0.34% \n", + "15 $69.91 $42.16 $61.06 40.82% \n", + "16 $56.37 $32.49 $43.36 33.37% \n", + "17 $37.96 $25.25 $32.51 4.17% \n", + "18 $32.68 $18.02 $31.21 56.36% \n", + "19 $32.21 $17.50 $19.96 -25.30% \n", + "20 $37.22 $23.91 $26.72 3.73% \n", + "21 $28.03 $11.38 $25.76 112.19% \n", + "22 $17.93 $10.82 $12.14 -31.22% \n", + "23 $26.55 $17.60 $17.65 -31.85% \n", + "24 $26.55 $17.33 $25.90 32.55% \n", + "25 $20.53 $16.86 $19.54 9.96% \n", + "26 $20.72 $13.89 $17.77 25.23% \n", + "27 $21.05 $13.98 $14.19 -27.19% \n", + "28 $23.03 $17.89 $19.49 1.78% \n", + "29 $32.25 $17.43 $19.15 -32.76% \n", + "30 $41.07 $15.43 $28.48 30.40% \n", + "31 $24.62 $16.99 $21.84 27.57% \n", + "32 $18.54 $12.58 $17.12 2.27% \n", + "33 $22.44 $15.12 $16.74 -6.64% " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "pan = pd.DataFrame(df[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "d={'Year':pan.iloc[:,0],'AverageClosing Price':pan.iloc[:,1],'Year Open':pan.iloc[:,2],'Year High':pan.iloc[:,3],'Year Low':pan.iloc[:,4],'Year Close':pan.iloc[:,5],'Annual% Change':pan.iloc[:,6]}" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data=d)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearAverageClosing PriceYear OpenYear HighYear LowYear CloseAnnual% Change
02020$55.05$61.18$63.27$49.57$53.38-12.58%
12019$57.05$46.54$66.30$46.54$61.0634.46%
22018$64.90$60.37$76.41$42.53$45.41-24.84%
32017$50.84$52.33$60.42$42.53$60.4212.47%
42016$43.58$36.76$54.06$26.21$53.7245.03%
\n", + "
" + ], + "text/plain": [ + " Year AverageClosing Price Year Open Year High Year Low Year Close \\\n", + "0 2020 $55.05 $61.18 $63.27 $49.57 $53.38 \n", + "1 2019 $57.05 $46.54 $66.30 $46.54 $61.06 \n", + "2 2018 $64.90 $60.37 $76.41 $42.53 $45.41 \n", + "3 2017 $50.84 $52.33 $60.42 $42.53 $60.42 \n", + "4 2016 $43.58 $36.76 $54.06 $26.21 $53.72 \n", + "\n", + " Annual% Change \n", + "0 -12.58% \n", + "1 34.46% \n", + "2 -24.84% \n", + "3 12.47% \n", + "4 45.03% " + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv('oil_price.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'Key length (7) exceeds index depth (2)'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpan\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"1.csv\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2924\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_single_key\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2925\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2926\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2927\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2928\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_multilevel\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2970\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2971\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2972\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2973\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mslice\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2974\u001b[0m \u001b[0mnew_columns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/multi.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mkeylen\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2402\u001b[0m raise KeyError('Key length ({0}) exceeds index depth ({1})'\n\u001b[0;32m-> 2403\u001b[0;31m ''.format(keylen, self.nlevels))\n\u001b[0m\u001b[1;32m 2404\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2405\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkeylen\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'Key length (7) exceeds index depth (2)'" + ] + } + ], + "source": [ + "pan[1,2,3,4,5,6,7].to_csv(\"1.csv\",index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup(res.text, 'lxml')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Crude Oil Prices - Historical Annual Data
YearAverage
Closing Price
Year OpenYear HighYear LowYear CloseAnnual
% Change
2020$55.05$61.18$63.27$49.57$53.38-12.58%
2019$57.05$46.54$66.30$46.54$61.0634.46%
2018$64.90$60.37$76.41$42.53$45.41-24.84%
2017$50.84$52.33$60.42$42.53$60.4212.47%
2016$43.58$36.76$54.06$26.21$53.7245.03%
2015$48.72$52.72$61.43$34.73$37.04-30.70%
2014$93.17$95.14$107.95$53.45$53.45-45.55%
2013$97.98$93.14$110.62$86.65$98.176.90%
2012$94.05$102.96$109.39$77.72$91.83-7.08%
2011$94.88$91.59$113.39$75.40$98.838.15%
2010$79.48$81.52$91.48$64.78$91.3815.10%
2009$61.95$46.17$81.03$34.03$79.3978.00%
2008$99.67$99.64$145.31$30.28$44.60-53.52%
2007$72.34$60.77$99.16$50.51$95.9557.68%
2006$66.05$63.11$77.05$55.90$60.85-0.34%
2005$56.64$42.16$69.91$42.16$61.0640.82%
2004$41.51$33.71$56.37$32.49$43.3633.37%
2003$31.08$31.97$37.96$25.25$32.514.17%
2002$26.19$21.13$32.68$18.02$31.2156.36%
2001$25.98$27.29$32.21$17.50$19.96-25.30%
2000$30.38$25.56$37.22$23.91$26.723.73%
1999$19.35$12.42$28.03$11.38$25.76112.19%
1998$14.42$17.41$17.93$10.82$12.14-31.22%
1997$20.61$25.55$26.55$17.60$17.65-31.85%
1996$22.12$19.83$26.55$17.33$25.9032.55%
1995$18.43$17.45$20.53$16.86$19.549.96%
1994$17.20$14.52$20.72$13.89$17.7725.23%
1993$18.43$19.03$21.05$13.98$14.19-27.19%
1992$20.58$19.43$23.03$17.89$19.491.78%
1991$21.54$26.53$32.25$17.43$19.15-32.76%
1990$24.53$22.88$41.07$15.43$28.4830.40%
1989$19.64$17.38$24.62$16.99$21.8427.57%
1988$15.97$17.77$18.54$12.58$17.122.27%
1987$19.20$18.13$22.44$15.12$16.74-6.64%
, \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Link PreviewHTML Code (Click to Copy)
WTI Crude Oil Prices - 10 Year Daily Chart
Macrotrends
Source
, \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Link PreviewHTML Code (Click to Copy)
WTI Crude Oil Prices - 10 Year Daily Chart
Macrotrends
Source
]\n" + ] + } + ], + "source": [ + "table = soup.select('table')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Crude Oil Prices - Historical Annual Data
YearAverage
Closing Price
Year OpenYear HighYear LowYear CloseAnnual
% Change
2020$55.05$61.18$63.27$49.57$53.38-12.58%
2019$57.05$46.54$66.30$46.54$61.0634.46%
2018$64.90$60.37$76.41$42.53$45.41-24.84%
2017$50.84$52.33$60.42$42.53$60.4212.47%
2016$43.58$36.76$54.06$26.21$53.7245.03%
2015$48.72$52.72$61.43$34.73$37.04-30.70%
2014$93.17$95.14$107.95$53.45$53.45-45.55%
2013$97.98$93.14$110.62$86.65$98.176.90%
2012$94.05$102.96$109.39$77.72$91.83-7.08%
2011$94.88$91.59$113.39$75.40$98.838.15%
2010$79.48$81.52$91.48$64.78$91.3815.10%
2009$61.95$46.17$81.03$34.03$79.3978.00%
2008$99.67$99.64$145.31$30.28$44.60-53.52%
2007$72.34$60.77$99.16$50.51$95.9557.68%
2006$66.05$63.11$77.05$55.90$60.85-0.34%
2005$56.64$42.16$69.91$42.16$61.0640.82%
2004$41.51$33.71$56.37$32.49$43.3633.37%
2003$31.08$31.97$37.96$25.25$32.514.17%
2002$26.19$21.13$32.68$18.02$31.2156.36%
2001$25.98$27.29$32.21$17.50$19.96-25.30%
2000$30.38$25.56$37.22$23.91$26.723.73%
1999$19.35$12.42$28.03$11.38$25.76112.19%
1998$14.42$17.41$17.93$10.82$12.14-31.22%
1997$20.61$25.55$26.55$17.60$17.65-31.85%
1996$22.12$19.83$26.55$17.33$25.9032.55%
1995$18.43$17.45$20.53$16.86$19.549.96%
1994$17.20$14.52$20.72$13.89$17.7725.23%
1993$18.43$19.03$21.05$13.98$14.19-27.19%
1992$20.58$19.43$23.03$17.89$19.491.78%
1991$21.54$26.53$32.25$17.43$19.15-32.76%
1990$24.53$22.88$41.07$15.43$28.4830.40%
1989$19.64$17.38$24.62$16.99$21.8427.57%
1988$15.97$17.77$18.54$12.58$17.122.27%
1987$19.20$18.13$22.44$15.12$16.74-6.64%
\n" + ] + } + ], + "source": [ + "print(table[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Crude Oil Prices - Historical Annual Data
YearAverage
Closing Price
Year OpenYear HighYear LowYear CloseAnnual
% Change
2020$55.05$61.18$63.27$49.57$53.38-12.58%
2019$57.05$46.54$66.30$46.54$61.0634.46%
2018$64.90$60.37$76.41$42.53$45.41-24.84%
2017$50.84$52.33$60.42$42.53$60.4212.47%
2016$43.58$36.76$54.06$26.21$53.7245.03%
2015$48.72$52.72$61.43$34.73$37.04-30.70%
2014$93.17$95.14$107.95$53.45$53.45-45.55%
2013$97.98$93.14$110.62$86.65$98.176.90%
2012$94.05$102.96$109.39$77.72$91.83-7.08%
2011$94.88$91.59$113.39$75.40$98.838.15%
2010$79.48$81.52$91.48$64.78$91.3815.10%
2009$61.95$46.17$81.03$34.03$79.3978.00%
2008$99.67$99.64$145.31$30.28$44.60-53.52%
2007$72.34$60.77$99.16$50.51$95.9557.68%
2006$66.05$63.11$77.05$55.90$60.85-0.34%
2005$56.64$42.16$69.91$42.16$61.0640.82%
2004$41.51$33.71$56.37$32.49$43.3633.37%
2003$31.08$31.97$37.96$25.25$32.514.17%
2002$26.19$21.13$32.68$18.02$31.2156.36%
2001$25.98$27.29$32.21$17.50$19.96-25.30%
2000$30.38$25.56$37.22$23.91$26.723.73%
1999$19.35$12.42$28.03$11.38$25.76112.19%
1998$14.42$17.41$17.93$10.82$12.14-31.22%
1997$20.61$25.55$26.55$17.60$17.65-31.85%
1996$22.12$19.83$26.55$17.33$25.9032.55%
1995$18.43$17.45$20.53$16.86$19.549.96%
1994$17.20$14.52$20.72$13.89$17.7725.23%
1993$18.43$19.03$21.05$13.98$14.19-27.19%
1992$20.58$19.43$23.03$17.89$19.491.78%
1991$21.54$26.53$32.25$17.43$19.15-32.76%
1990$24.53$22.88$41.07$15.43$28.4830.40%
1989$19.64$17.38$24.62$16.99$21.8427.57%
1988$15.97$17.77$18.54$12.58$17.122.27%
1987$19.20$18.13$22.44$15.12$16.74-6.64%
, \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Link PreviewHTML Code (Click to Copy)
WTI Crude Oil Prices - 10 Year Daily Chart
Macrotrends
Source
, \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Link PreviewHTML Code (Click to Copy)
WTI Crude Oil Prices - 10 Year Daily Chart
Macrotrends
Source
]\n" + ] + } + ], + "source": [ + "table = soup.select('table')\n", + "print(table)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crawling_assignment/Python crawling test.ipynb b/crawling_assignment/Python crawling test.ipynb new file mode 100644 index 0000000..2714db7 --- /dev/null +++ b/crawling_assignment/Python crawling test.ipynb @@ -0,0 +1,374 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://tva1.sinaimg.cn/large/00831rSTgy1gck2sgn2d3j3126036tc2.jpg)\n", + "\n", + "在wikipedia中打开https://en.wikipedia.org/wiki/Main_Page文件,并提取所有的头文件\n", + "\n", + "使用find_all函数查找所有头文件tag并暂时,使用循环将结果打印出来" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Main Page\n", + "From today's featured article\n", + "Did you know ...\n", + "In the news\n", + "On this day\n", + "From today's featured list\n", + "Today's featured picture\n", + "Other areas of Wikipedia\n", + "Wikipedia's sister projects\n", + "Wikipedia languages\n", + "Navigation menu\n", + "Personal tools\n", + "Namespaces\n", + "Variants\n", + "Views\n", + "More\n", + "Search\n", + "Navigation\n", + "Interaction\n", + "Tools\n", + "In other projects\n", + "Print/export\n", + "Languages\n" + ] + } + ], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "\n", + "url = \"https://en.wikipedia.org/wiki/Main_Page\"\n", + "\n", + "resp = requests.get(url)\n", + "\n", + "soup = BeautifulSoup(resp.text,\"lxml\")\n", + "\n", + "soup = soup.find_all([\"h1\",\"h2\",\"h3\",\"h4\",\"h5\",\"h6\",\"h7\"])\n", + "\n", + "for item in soup:\n", + " print(item.get_text().strip())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://tva1.sinaimg.cn/large/00831rSTgy1gck3i4ddhwj313203sn0t.jpg)\n", + "\n", + "找到Python文本中的a标签\n", + "再提取它的属性即可" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#mw-head\n", + "#p-search\n", + "https://en.wiktionary.org/wiki/Python\n", + "https://en.wiktionary.org/wiki/python\n", + "#Snakes\n", + "#Ancient_Greece\n", + "#Media_and_entertainment\n", + "#Computing\n", + "#Engineering\n", + "#Roller_coasters\n", + "#Vehicles\n", + "#Weaponry\n", + "#People\n", + "#Other_uses\n", + "#See_also\n", + "/w/index.php?title=Python&action=edit§ion=1\n", + "/wiki/Pythonidae\n", + "/wiki/Python_(genus)\n", + "/w/index.php?title=Python&action=edit§ion=2\n", + "/wiki/Python_(mythology)\n", + "/wiki/Python_of_Aenus\n", + "/wiki/Python_(painter)\n", + "/wiki/Python_of_Byzantium\n", + "/wiki/Python_of_Catana\n", + "/w/index.php?title=Python&action=edit§ion=3\n", + "/wiki/Python_(film)\n", + "/wiki/Pythons_2\n", + "/wiki/Monty_Python\n", + "/wiki/Python_(Monty)_Pictures\n", + "/w/index.php?title=Python&action=edit§ion=4\n", + "/wiki/Python_(programming_language)\n", + "/wiki/CPython\n", + "/wiki/CMU_Common_Lisp\n", + "/wiki/PERQ#PERQ_3\n", + "/w/index.php?title=Python&action=edit§ion=5\n", + "/w/index.php?title=Python&action=edit§ion=6\n", + "/wiki/Python_(Busch_Gardens_Tampa_Bay)\n", + "/wiki/Python_(Coney_Island,_Cincinnati,_Ohio)\n", + "/wiki/Python_(Efteling)\n", + "/w/index.php?title=Python&action=edit§ion=7\n", + "/wiki/Python_(automobile_maker)\n", + "/wiki/Python_(Ford_prototype)\n", + "/w/index.php?title=Python&action=edit§ion=8\n", + "/wiki/Colt_Python\n", + "/wiki/Python_(missile)\n", + "/wiki/Python_(nuclear_primary)\n", + "/w/index.php?title=Python&action=edit§ion=9\n", + "/wiki/Python_Anghelo\n", + "/w/index.php?title=Python&action=edit§ion=10\n", + "/wiki/PYTHON\n", + "/w/index.php?title=Python&action=edit§ion=11\n", + "/wiki/Cython\n", + "/wiki/Pyton\n", + "/wiki/File:Disambig_gray.svg\n", + "/wiki/Help:Disambiguation\n", + "https://en.wikipedia.org/w/index.php?title=Special:WhatLinksHere/Python&namespace=0\n", + "https://en.wikipedia.org/w/index.php?title=Python&oldid=943216744\n", + "/wiki/Help:Category\n", + "/wiki/Category:Disambiguation_pages\n", + "/wiki/Category:Disambiguation_pages_with_short_description\n", + "/wiki/Category:All_article_disambiguation_pages\n", + "/wiki/Category:All_disambiguation_pages\n", + "/wiki/Category:Animal_common_name_disambiguation_pages\n", + "/wiki/Special:MyTalk\n", + "/wiki/Special:MyContributions\n", + "/w/index.php?title=Special:CreateAccount&returnto=Python\n", + "/w/index.php?title=Special:UserLogin&returnto=Python\n", + "/wiki/Python\n", + "/wiki/Talk:Python\n", + "/wiki/Python\n", + "/w/index.php?title=Python&action=edit\n", + "/w/index.php?title=Python&action=history\n", + "/wiki/Main_Page\n", + "/wiki/Main_Page\n", + "/wiki/Wikipedia:Contents\n", + "/wiki/Wikipedia:Featured_content\n", + "/wiki/Portal:Current_events\n", + "/wiki/Special:Random\n", + "https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en\n", + "//shop.wikimedia.org\n", + "/wiki/Help:Contents\n", + "/wiki/Wikipedia:About\n", + "/wiki/Wikipedia:Community_portal\n", + "/wiki/Special:RecentChanges\n", + "//en.wikipedia.org/wiki/Wikipedia:Contact_us\n", + "/wiki/Special:WhatLinksHere/Python\n", + "/wiki/Special:RecentChangesLinked/Python\n", + "/wiki/Wikipedia:File_Upload_Wizard\n", + "/wiki/Special:SpecialPages\n", + "/w/index.php?title=Python&oldid=943216744\n", + "/w/index.php?title=Python&action=info\n", + "https://www.wikidata.org/wiki/Special:EntityPage/Q747452\n", + "/w/index.php?title=Special:CiteThisPage&page=Python&id=943216744\n", + "https://commons.wikimedia.org/wiki/Category:Python\n", + "/w/index.php?title=Special:Book&bookcmd=book_creator&referer=Python\n", + "/w/index.php?title=Special:ElectronPdf&page=Python&action=show-download-screen\n", + "/w/index.php?title=Python&printable=yes\n", + "https://af.wikipedia.org/wiki/Python\n", + "https://als.wikipedia.org/wiki/Python\n", + "https://ar.wikipedia.org/wiki/%D8%A8%D8%A7%D9%8A%D8%AB%D9%88%D9%86\n", + "https://az.wikipedia.org/wiki/Python\n", + "https://bn.wikipedia.org/wiki/%E0%A6%AA%E0%A6%BE%E0%A6%87%E0%A6%A5%E0%A6%A8_(%E0%A6%A6%E0%A7%8D%E0%A6%AC%E0%A7%8D%E0%A6%AF%E0%A6%B0%E0%A7%8D%E0%A6%A5%E0%A6%A4%E0%A6%BE_%E0%A6%A8%E0%A6%BF%E0%A6%B0%E0%A6%B8%E0%A6%A8)\n", + "https://be.wikipedia.org/wiki/Python\n", + "https://bg.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%BF%D0%BE%D1%8F%D1%81%D0%BD%D0%B5%D0%BD%D0%B8%D0%B5)\n", + "https://cs.wikipedia.org/wiki/Python_(rozcestn%C3%ADk)\n", + "https://da.wikipedia.org/wiki/Python\n", + "https://de.wikipedia.org/wiki/Python\n", + "https://eo.wikipedia.org/wiki/Pitono_(apartigilo)\n", + "https://eu.wikipedia.org/wiki/Python_(argipena)\n", + "https://fa.wikipedia.org/wiki/%D9%BE%D8%A7%DB%8C%D8%AA%D9%88%D9%86\n", + "https://fr.wikipedia.org/wiki/Python\n", + "https://ko.wikipedia.org/wiki/%ED%8C%8C%EC%9D%B4%EC%84%A0\n", + "https://hr.wikipedia.org/wiki/Python_(razdvojba)\n", + "https://io.wikipedia.org/wiki/Pitono\n", + "https://id.wikipedia.org/wiki/Python\n", + "https://ia.wikipedia.org/wiki/Python_(disambiguation)\n", + "https://is.wikipedia.org/wiki/Python_(a%C3%B0greining)\n", + "https://it.wikipedia.org/wiki/Python_(disambigua)\n", + "https://he.wikipedia.org/wiki/%D7%A4%D7%99%D7%AA%D7%95%D7%9F\n", + "https://ka.wikipedia.org/wiki/%E1%83%9E%E1%83%98%E1%83%97%E1%83%9D%E1%83%9C%E1%83%98_(%E1%83%9B%E1%83%A0%E1%83%90%E1%83%95%E1%83%90%E1%83%9A%E1%83%9B%E1%83%9C%E1%83%98%E1%83%A8%E1%83%95%E1%83%9C%E1%83%94%E1%83%9A%E1%83%9D%E1%83%95%E1%83%90%E1%83%9C%E1%83%98)\n", + "https://kg.wikipedia.org/wiki/Mboma_(nyoka)\n", + "https://la.wikipedia.org/wiki/Python_(discretiva)\n", + "https://lb.wikipedia.org/wiki/Python\n", + "https://hu.wikipedia.org/wiki/Python_(egy%C3%A9rtelm%C5%B1s%C3%ADt%C5%91_lap)\n", + "https://mr.wikipedia.org/wiki/%E0%A4%AA%E0%A4%BE%E0%A4%AF%E0%A4%A5%E0%A5%89%E0%A4%A8_(%E0%A4%86%E0%A4%9C%E0%A5%8D%E0%A4%9E%E0%A4%BE%E0%A4%B5%E0%A4%B2%E0%A5%80_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE)\n", + "https://nl.wikipedia.org/wiki/Python\n", + "https://ja.wikipedia.org/wiki/%E3%83%91%E3%82%A4%E3%82%BD%E3%83%B3\n", + "https://no.wikipedia.org/wiki/Pyton\n", + "https://pl.wikipedia.org/wiki/Pyton\n", + "https://pt.wikipedia.org/wiki/Python_(desambigua%C3%A7%C3%A3o)\n", + "https://ru.wikipedia.org/wiki/Python_(%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D1%8F)\n", + "https://sk.wikipedia.org/wiki/Python\n", + "https://sr.wikipedia.org/wiki/%D0%9F%D0%B8%D1%82%D0%BE%D0%BD_(%D0%B2%D0%B8%D1%88%D0%B5%D0%B7%D0%BD%D0%B0%D1%87%D0%BD%D0%B0_%D0%BE%D0%B4%D1%80%D0%B5%D0%B4%D0%BD%D0%B8%D1%86%D0%B0)\n", + "https://sh.wikipedia.org/wiki/Python\n", + "https://fi.wikipedia.org/wiki/Python\n", + "https://sv.wikipedia.org/wiki/Pyton\n", + "https://th.wikipedia.org/wiki/%E0%B9%84%E0%B8%9E%E0%B8%97%E0%B8%AD%E0%B8%99\n", + "https://tr.wikipedia.org/wiki/Python\n", + "https://uk.wikipedia.org/wiki/%D0%9F%D1%96%D1%84%D0%BE%D0%BD\n", + "https://ur.wikipedia.org/wiki/%D9%BE%D8%A7%D8%A6%DB%8C%D8%AA%DA%BE%D9%88%D9%86\n", + "https://vi.wikipedia.org/wiki/Python\n", + "https://zh.wikipedia.org/wiki/Python_(%E6%B6%88%E6%AD%A7%E4%B9%89)\n", + "https://www.wikidata.org/wiki/Special:EntityPage/Q747452#sitelinks-wikipedia\n", + "//en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License\n", + "//creativecommons.org/licenses/by-sa/3.0/\n", + "//foundation.wikimedia.org/wiki/Terms_of_Use\n", + "//foundation.wikimedia.org/wiki/Privacy_policy\n", + "//www.wikimediafoundation.org/\n", + "https://foundation.wikimedia.org/wiki/Privacy_policy\n", + "/wiki/Wikipedia:About\n", + "/wiki/Wikipedia:General_disclaimer\n", + "//en.wikipedia.org/wiki/Wikipedia:Contact_us\n", + "https://www.mediawiki.org/wiki/Special:MyLanguage/How_to_contribute\n", + "https://stats.wikimedia.org/#/en.wikipedia.org\n", + "https://foundation.wikimedia.org/wiki/Cookie_statement\n", + "//en.m.wikipedia.org/w/index.php?title=Python&mobileaction=toggle_view_mobile\n", + "https://wikimediafoundation.org/\n", + "https://www.mediawiki.org/\n" + ] + } + ], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "\n", + "url = \"https://en.wikipedia.org/wiki/Python\"\n", + "\n", + "\n", + "\n", + "rs = requests.get(url)\n", + "\n", + "\n", + "soup = BeautifulSoup(rs.text,'lxml')\n", + "\n", + "\n", + "for link in soup.find_all(\"a\"):\n", + " if 'href' in link.attrs: \n", + " print(link.attrs['href'])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://tva1.sinaimg.cn/large/00831rSTgy1gck6fh1s4aj312u03swgm.jpg)\n", + "Fint the number of twitter of DonaldTrump" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](https://tva1.sinaimg.cn/large/00831rSTgy1gck7q1ytipj314i03w0w4.jpg)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter the city:Beijing\n", + "\n", + "City name not found...\n" + ] + } + ], + "source": [ + "import requests\n", + "from pprint import pprint\n", + "\n", + "\n", + "def weather_data(query):\n", + " res = requests.get(\n", + " 'http://api.openweathermap.org/data/2.5/weather?' + query + '&APPID=****************************8&units=metric');\n", + " return res.json();\n", + "\n", + "\n", + "def print_weather(result, city):\n", + " print(\"{}'s temperature: {}°C \".format(city, result['main']['temp']))\n", + " print(\"Wind speed: {} m/s\".format(result['wind']['speed']))\n", + " print(\"Description: {}\".format(result['weather'][0]['description']))\n", + " print(\"Weather: {}\".format(result['weather'][0]['main']))\n", + "\n", + "\n", + "def main():\n", + " city = input('Enter the city:')\n", + " print()\n", + " try:\n", + " query = 'q=' + city;\n", + " w_data = weather_data(query);\n", + " print_weather(w_data, city)\n", + " print()\n", + " except:\n", + " print('City name not found...')\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import time\n", + "url = \"https://live.vhall.com/770936907?shareId=u-13115102-3&from=singlemessage\"\n", + "\n", + "for i in range(2000):\n", + " time.sleep(2)\n", + " resp = requests.get(url)\n", + " resp.text\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crawling_program_lianjia/chromedriver b/crawling_program_lianjia/chromedriver new file mode 100755 index 0000000..1161497 Binary files /dev/null and b/crawling_program_lianjia/chromedriver differ diff --git a/crawling_program_lianjia/crawling.ipynb b/crawling_program_lianjia/crawling.ipynb new file mode 100644 index 0000000..d6536aa --- /dev/null +++ b/crawling_program_lianjia/crawling.ipynb @@ -0,0 +1,2212 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "html_doc = \"\"\"\n", + "The Dormouse's story\n", + "\n", + "

The Dormouse's story

\n", + "\n", + "

Once upon a time there were three little sisters; and their names were\n", + "Elsie,\n", + "Lacie and\n", + "Tillie;\n", + "and they lived at the bottom of a well.

\n", + "\n", + "

...

\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "soup = BeautifulSoup(html_doc)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + " \n", + " The Dormouse's story\n", + " \n", + " \n", + " \n", + "

\n", + " \n", + " The Dormouse's story\n", + " \n", + "

\n", + "

\n", + " Once upon a time there were three little sisters; and their names were\n", + " \n", + " Elsie\n", + " \n", + " ,\n", + " \n", + " Lacie\n", + " \n", + " and\n", + " \n", + " Tillie\n", + " \n", + " ;\n", + "and they lived at the bottom of a well.\n", + "

\n", + "

\n", + " ...\n", + "

\n", + " \n", + "\n" + ] + } + ], + "source": [ + "print(soup.prettify())" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "The Dormouse's story" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 浏览器结构化数据方法\n", + "soup.title\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "

The Dormouse's story

" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.p" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Elsie" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.a" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Elsie,\n", + " Lacie,\n", + " Tillie]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.find_all('a')" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Elsie" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.find(id='link1')" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"The Dormouse's story\"" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "soup.title.string" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://example.com/elsie\n", + "http://example.com/lacie\n", + "http://example.com/tillie\n" + ] + } + ], + "source": [ + "for link in soup.find_all('a'):\n", + " #print(link.get('href'))\n", + " #print(link.get('class'))\n", + " #print(link.get('id'))\n", + " print(link['href'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "从文档中获取所有文字" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Dormouse's story\n", + "\n", + "The Dormouse's story\n", + "Once upon a time there were three little sisters; and their names were\n", + "Elsie,\n", + "Lacie and\n", + "Tillie;\n", + "and they lived at the bottom of a well.\n", + "...\n", + "\n" + ] + } + ], + "source": [ + "print(soup.get_text())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "对象的种类\n", + "Tag对象与xml或html原生文档中的tag相同" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup('Extremely bold')\n", + "tag = soup.b" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Extremely bold" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tag" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "bs4.element.Tag" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(tag)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "tag对象的属性: `Name`和`Attributes`" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'b'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tag.name" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['boldest']" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tag['class']" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'class': ['boldest']}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tag.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "tag['class']='very'" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "tag['id']=2" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Extremely bold" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tag" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['body', 'strikeout']" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "css_soup = BeautifulSoup('

')\n", + "css_soup.p['class']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "all_href = soup.find_all('a')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "\n", + "url = 'https://www.thestar.com.my/tech/tech-news/2020/03/02/get-in-gear-sweet-dreams-are-made-of-tech'\n", + "url_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'\n", + "headers = {'User-Agent':url_agent}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [], + "source": [ + "page = requests.get(url, headers=headers)" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup(page.text,'html.parser')" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": {}, + "outputs": [], + "source": [ + "content = soup.find_all('div',{'class':'row content-holder story-wrapper'})" + ] + }, + { + "cell_type": "code", + "execution_count": 213, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[
\n", + "
    \n", + "
  • \n", + " Get In Gear\n", + " \n", + "
  • \n", + "
  • \n", + "

    \n", + " Monday, 02 Mar 2020\n", + "

    \n", + "

    By Angelin Yeoh

    \n", + "
  • \n", + "
  • \n", + "
      \n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
      \n", + "
      \n", + "
      \n", + " \n", + "
      \n", + " \n", + "
      \n", + "
    \n", + "
  • \n", + " \n", + "
  • \n", + "
    \n", + " \n", + "
    \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "

Stay warm or cool with the Climate 360 smart bed. — Sleep Number

\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "

Instead of unplugging to unwind, companies are getting users to turn to tech to help them get better sleep.

Climate 360 Smart Bed

Temperature is important to sleeping soundly, according to US-based Sleep Number, which has developed a bed that helps a user fall asleep faster by warming their feet.

After that it will keep the user snoozing by balancing the temperature based on the person’s natural wake and sleep cycles.

The firmness of the mattress and head level on each side is adjustable, allowing you and your partner to customise the bed to each other’s liking.

And if your partner is snoring, you could turn on the Partner Snore feature on the companion app – it will raise your partner’s head to alleviate mild snoring.

The app can also be used to track sleep time and breathing levels.

The price and availability will be announced later this year.

Sleep Robot

The Sleep Robot from Somnox has sensors to detect a user’s breathing pattern, allowing it to automatically adapt to the user’s breathing rate.

\"TheThe Sleep Robot helps lower a user’s breathing rate to help the person fall asleep. — Somnox

The company claimed that by holding the Sleep Robot close to the chest, users will subconsciously mimic the robot’s breathing rhythm as it gradually lowers it. This will put the person’s body in relaxation mode and, ultimately, help him or her fall asleep.

If that doesn’t help, users can play lullabies, guided meditations, audiobooks or white noise.

The company claimed that the Sleep Robot’s design helps users maintain a natural position without deviating from their natural neck and shoulder alignment when hugging it. The Sleep Robot costs £599 (RM2,750) on the Somnox website.

Frortier Health Tracker Pillow

This pillow has an embedded device for tracking and monitoring sleep patterns.

The data can be viewed from the Sleepace app, which also offers a daily report, with recommendations for better sleep based on a user’s breathing rate, heart rate and body movement.

\"TheThe Frortier Health Tracker Pillow has an embedded device for tracking and monitoring sleeping patterns. — itsLiving

Sleepace can also be used to play relaxing sounds to help a user fall asleep or set up a smart alarm that wakes a person up with nature-inspired sounds.

The tracker is claimed to be medical grade and more sensitive than most wristbands.

The Frortier Health Tracker Pillow goes for RM688 on the itsLiving website.

Muse S

Muse S is an EEG (electroencephalogram) headband that is claimed to use advanced signal processing technology to help keep the mind calm.

\"MuseMuse S is claimed to provide real-time feedback on your brain activity. — Muse

If the signal processor interprets your mental activity as calm, it will play a gentle weather sound but if it senses your focus drifting away, you’ll hear stormy weather which is your cue to bring your attention back to your breath.

The accompanying app also offers a series of guided meditations such as Go-To-Sleep Journeys that are designed to help users fall asleep.

Muse S is available online for US$350 (RM1,450).

\n", + "
\n", + "
\n", + "
Article type: free
\n", + "
User access status:
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + "
\n", + "
\n", + " \n", + "

\n", + " \n", + "

\n", + "
\n", + " Get In Gear\n", + " Wearables\n", + " Technology\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "    \n", + " \n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

What do you think of this article?

\n", + "
\n", + "
\n", + " It is insightful\n", + "
\n", + "
\n", + " Not in my interest\n", + "
\n", + "
\n", + "

\n", + " 0%\n", + " readers found this article useful\n", + "

\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
]" + ] + }, + "execution_count": 213, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "content" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'read_html' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mread_html\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'read_html' is not defined" + ] + } + ], + "source": [ + "read_html(page.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'re.Pattern' object has no attribute 'timeout'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mhtml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0murlopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mre\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"https://www.zwdu.com/book/25435/.+.html\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'gb2312'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/anaconda3/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[0m\n\u001b[1;32m 220\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[0mopener\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_opener\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minstall_opener\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.7/urllib/request.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(self, fullurl, data, timeout)\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 516\u001b[0;31m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 517\u001b[0m \u001b[0mprotocol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 're.Pattern' object has no attribute 'timeout'" + ] + } + ], + "source": [ + "#正则表达式\n", + "from bs4 import BeautifulSoup\n", + "from urllib.request import urlopen\n", + "import re\n", + "\n", + "\n", + "html = urlopen(\"https://www.zwdu.com/book/25435/7774879.html\").read().decode('gb2312')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup(html,'lxml')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "content = soup.find_all('div',{'id':'content'})" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(content)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'“没想到,我聂天居然重生了!”房间之中,聂天身躯剧烈颤抖,眼神之中充斥着压抑的愤怒。八一中文\\xa0W≤W≈W=.≈8≠1≥Z≥W≈.≤C≥OM他的心中,惊涛骇浪,过往种种在脑海之中飞驰而过。聂天本是天界第一战神,晨昏神域大半疆域都是他一手打下。赫赫威名,震慑神域!为了封赏他的绝世战功,晨昏大帝将掌上明珠紫烟公主许配于他。洛紫烟,晨昏神域第一美女,风采绝世,倾国倾城。配上聂天这天界第一战神,堪称天造地设。但聂天怎么也想不到,洛紫烟竟会在洞房之夜对他出手。堂堂天界第一战神,竟死在未婚妻的手上,还死在了洞房之夜,真是天大的笑话!“她为何杀我?难道传言是真的?晨昏大帝将洛紫烟许配于我,本来就是一个阴谋,就是为了要杀我。”聂天眼神凌冽,心中惊涛骇浪。功高震主,历来都是臣子大忌。聂天声望在晨昏神域,远胜晨昏大帝,后者想杀他,亦在情理之中。“好一个晨昏大帝,好一个洛紫烟,你们父女好狠的心!我聂天为晨昏神域打下大片疆土,更视洛紫烟为毕生挚爱,没想到最后竟死在你们父女手上。”聂天双目赤红,全身颤抖。良久,聂天稍稍镇定,眼中闪现一抹精芒,突然狂笑一声:“也罢!既然上苍让我聂天重生一回,我聂天再不做别人的殿下之臣。”“这一世,我要创造我的世界!”“这一世,我要成为万古天帝!”“这一世,我要主宰天界神域!”豪言壮语,振聋聩,聂天整个人锋芒毕露,好似一把出鞘利剑!重生一次,聂天信心满满,但当他看到自己的这副身躯,却是苦笑一声,自嘲道:“现在的这副身体,实在弱了一些。”死在洛紫烟手中,聂天再次醒来,已是百年之后。他的灵魂重生在已经病死的少年身上。巧合的是,这个少年也叫聂天。此时的聂天,乃是墨阳城三大家族之一聂家的家主。但是他这个家主,在家族之中却连一个体面的下人都不如。就连他死在房间,都没人知道。究其原因,就是因为他是一个元脉尽毁的废人。三年前,聂天还是墨阳城第一天才,年仅十三岁,实力达到元脉九重,堪称妖孽。但是三年前的一天,聂天和父亲及多位族人进入裂云山脉,进行历练,却遭遇一群黑衣人的伏击,结果父亲和族人全部被杀,只有聂天一人拼死逃出,但却元脉尽毁,成了废人。父亲死后,他继任家主。但是在所有人眼中,他这个家主,屁都不是。元脉尽毁,聂天开始自暴自弃,自甘堕落,每天借酒消愁,流连风月之地。就在昨天,他被墨阳城三大家族之一巴家的大少爷巴子阳,打得重伤昏死。抬回聂府之后,今天早上就咽气了。这也就给了战神聂天附身的机会。“元脉尽毁吗?”聂天稍稍镇定,开始检查自己的新身体。“毒!”聂天内视元脉,惊愕现,他的元脉除了损伤严重之外,竟然还呈现污黑之色。“我是被毒死的!”聂天脑海之中出现一张面孔,聂家大执事,聂三通。在聂天受伤期间,只有聂三通看过他,给他服下了一枚“恢复伤势”的固元丹。“好一个聂三通,定是觊觎家主之位,谋害于我。”聂天马上明白了,双瞳之中浮现一抹森然寒光。“嗯?”聂天继续内视身体,脸色唰地一变,惊骇道:“星辰原石!居然跟着我一起重生了!”“家主,大事不好了!”就在这个时候,一道身影夺门而入,惊慌大叫。“阿牛,生什么事了?”聂天看着来人,淡淡问道。阿牛,聂天的仆从,也是整个聂家唯一一个把他当家主的人。“家主,巴,巴家的人来逼婚了!”喘着粗气,阿牛着急说道。“巴家!”聂天微微皱眉,想起自己就是被巴家大少爷巴子阳打伤,脸色顿时变得阴沉起来。巴家,和聂家一样,墨阳城三大家族之一。不过自从三年前聂天父亲死后,聂家的声望一天不如一天,到了今日,已经是大厦将倾。正因为这样,巴家大少爷巴子阳才敢把聂天这个巴家家主打得重伤昏死。“阿牛,你不要着急,逼婚到底是怎么回事?”聂天并不慌张,反倒玩味一笑。阿牛愣了一下,一脸古怪地看着聂天。这还是家主吗?怎么这么镇定?阿牛隐隐感觉聂天变了,和以前不一样了,却又说不出哪里不一样。“快说啊。”聂天见阿牛愣,催促一声。“哎!是!”阿牛反应过来,赶紧说道:“巴家的管家带着巴家大少年和三少爷来我们府上提亲了,而且是向最有天赋的九小姐提亲。”“九妹!”聂天脑海中浮现一张粉雕玉琢,乖巧可爱的脸蛋。聂家是大家族,人口多,同辈之间,直接按年龄排序。九妹,就是聂家年轻一代年龄第九的女孩。“九妹好像叫聂雨柔吧。”聂天记得,上次见九妹,还是在三年之前,那时的聂雨柔还是一个六岁的小姑娘。现在想来,也该有九岁了。“九岁?!”聂天惊叫一声。谁会向一个九岁的小女孩提亲?“巴家给谁提亲?”聂天脸色一沉,眼神闪过一抹狠辣。向一个九岁的小孩提亲,巴家的人简直丧心病狂。先是打伤聂天,然后又上门逼婚,巴家的人真是嚣张到姥姥家了。“巴家三少爷巴子星。”阿牛回答。“巴子星!”聂天脸色更加阴沉,沉声道:“如果我没记错,巴子星是个傻子吧。”“嗯。”阿牛看着聂天,咽了一下口水,重重点头。聂天确实没有记错,巴子星的确是一个傻子,而且还是他亲手打傻的。三年前的聂天,风头正劲,墨阳城武会之上,巴子星不服气,向他挑战,结果被打成了傻子。为此事,聂家和巴家差一点血拼。现在,巴家居然替巴子星向聂雨柔提亲,明显是欺负聂家势弱,想要报以前的耻辱。聂雨柔是聂家新一代天才,刚刚九岁,已经是元脉四重,天赋直追当年的聂天。若是聂雨柔嫁给了巴子星,聂家绝对会沦为墨阳城的笑柄,而且还将失去一位少年天才。“不行!绝对不能让这种事情生!”聂天一脸肃杀,低吼道:“带路,我要去议事大堂!”“在我的头上拉屎,还管我要纸。巴家,今天我要让你们把自己拉的屎,吃回去!”聂天心中,霸道怒吼。'" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "content[0].get_text().replace('\\xa0\\xa0\\xa0\\xa0','').strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "http://www.baidu.com/s?wd=%E8%8E%AB%E7%83%A6python\n" + ] + } + ], + "source": [ + "import requests\n", + "import webbrowser\n", + "param = {'wd':'莫烦python'}\n", + "url = 'http://www.baidu.com/s'\n", + "r = requests.get(url,params=param)\n", + "print(r.url)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 173, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "webbrowser.open(r.url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello there, sdf sdf!\n" + ] + } + ], + "source": [ + "data = {'firstname':'sdf','lastname':'sdf'}\n", + "r = requests.post('http://pythonscraping.com/pages/files/processing.php',data=data)\n", + "print(r.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": {}, + "outputs": [], + "source": [ + "import selenium\n", + "from selenium import webdriver\n", + "driver = webdriver.Chrome(executable_path='/Users/liuhongyang/Desktop/chromedriver')\n", + "\n", + "\n", + "driver.get(\"https://www.thestar.com.my/business/marketwatch/\")\n", + "driver.find_element_by_xpath(\"//button[@id='buttonsearch']/a/i\").click()\n", + "driver.find_element_by_id(\"qTextBox\").clear()\n", + "driver.find_element_by_id(\"qTextBox\").send_keys(\"oil price\")\n", + "driver.find_element_by_id(\"myform\").submit()\n", + "\n", + "html = driver.page_source\n", + "soup = BeautifulSoup(html,'html.parser')\n", + "content = soup.find_all('div',{'class':'container'})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 226, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup(html,'html.parser')" + ] + }, + { + "cell_type": "code", + "execution_count": 236, + "metadata": {}, + "outputs": [], + "source": [ + "content = soup.find_all('div',{'class':'container'})" + ] + }, + { + "cell_type": "code", + "execution_count": 237, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 237, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import selenium\n", + "from selenium import webdriver\n", + "driver.get(\"http://www.python.org\")" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a\n" + ] + } + ], + "source": [ + "print('a')" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "import requests\n", + "url='https://www.indexmundi.com/commodities/?commodity=crude-oil&months=60'\n", + "\n", + "r = requests.get(url)\n", + "\n", + "\n", + "\n", + "df = pd.read_html(r.text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MonthPriceChange
0Feb 201554.79-
1Mar 201552.83-3.58 %
2Apr 201557.548.92 %
3May 201562.518.63 %
4Jun 201561.31-1.92 %
5Jul 201554.34-11.37 %
6Aug 201545.69-15.92 %
7Sep 201546.281.29 %
8Oct 201546.961.47 %
9Nov 201543.11-8.20 %
10Dec 201536.57-15.17 %
11Jan 201629.78-18.57 %
12Feb 201631.034.20 %
13Mar 201637.3420.34 %
14Apr 201640.759.13 %
15May 201645.9412.74 %
16Jun 201647.693.81 %
17Jul 201644.13-7.46 %
18Aug 201644.881.70 %
19Sep 201645.040.36 %
20Oct 201649.299.44 %
21Nov 201645.26-8.18 %
22Dec 201652.6216.26 %
23Jan 201753.591.84 %
24Feb 201754.351.42 %
25Mar 201750.90-6.35 %
26Apr 201752.162.48 %
27May 201749.89-4.35 %
28Jun 201746.17-7.46 %
29Jul 201747.663.23 %
30Aug 201749.944.78 %
31Sep 201752.956.03 %
32Oct 201754.923.72 %
33Nov 201759.939.12 %
34Dec 201761.192.10 %
35Jan 201866.238.24 %
36Feb 201863.46-4.18 %
37Mar 201864.171.12 %
38Apr 201868.797.20 %
39May 201873.436.75 %
40Jun 201871.98-1.97 %
41Jul 201872.670.96 %
42Aug 201871.08-2.19 %
43Sep 201875.366.02 %
44Oct 201876.731.82 %
45Nov 201862.32-18.78 %
46Dec 201853.96-13.41 %
47Jan 201956.584.86 %
48Feb 201961.138.04 %
49Mar 201963.794.35 %
50Apr 201968.587.51 %
51May 201966.83-2.55 %
52Jun 201959.76-10.58 %
53Jul 201961.482.88 %
54Aug 201957.67-6.20 %
55Sep 201960.044.11 %
56Oct 201957.27-4.61 %
57Nov 201960.405.47 %
58Dec 201963.354.88 %
59Jan 202061.63-2.72 %
\n", + "
" + ], + "text/plain": [ + " Month Price Change\n", + "0 Feb 2015 54.79 -\n", + "1 Mar 2015 52.83 -3.58 %\n", + "2 Apr 2015 57.54 8.92 %\n", + "3 May 2015 62.51 8.63 %\n", + "4 Jun 2015 61.31 -1.92 %\n", + "5 Jul 2015 54.34 -11.37 %\n", + "6 Aug 2015 45.69 -15.92 %\n", + "7 Sep 2015 46.28 1.29 %\n", + "8 Oct 2015 46.96 1.47 %\n", + "9 Nov 2015 43.11 -8.20 %\n", + "10 Dec 2015 36.57 -15.17 %\n", + "11 Jan 2016 29.78 -18.57 %\n", + "12 Feb 2016 31.03 4.20 %\n", + "13 Mar 2016 37.34 20.34 %\n", + "14 Apr 2016 40.75 9.13 %\n", + "15 May 2016 45.94 12.74 %\n", + "16 Jun 2016 47.69 3.81 %\n", + "17 Jul 2016 44.13 -7.46 %\n", + "18 Aug 2016 44.88 1.70 %\n", + "19 Sep 2016 45.04 0.36 %\n", + "20 Oct 2016 49.29 9.44 %\n", + "21 Nov 2016 45.26 -8.18 %\n", + "22 Dec 2016 52.62 16.26 %\n", + "23 Jan 2017 53.59 1.84 %\n", + "24 Feb 2017 54.35 1.42 %\n", + "25 Mar 2017 50.90 -6.35 %\n", + "26 Apr 2017 52.16 2.48 %\n", + "27 May 2017 49.89 -4.35 %\n", + "28 Jun 2017 46.17 -7.46 %\n", + "29 Jul 2017 47.66 3.23 %\n", + "30 Aug 2017 49.94 4.78 %\n", + "31 Sep 2017 52.95 6.03 %\n", + "32 Oct 2017 54.92 3.72 %\n", + "33 Nov 2017 59.93 9.12 %\n", + "34 Dec 2017 61.19 2.10 %\n", + "35 Jan 2018 66.23 8.24 %\n", + "36 Feb 2018 63.46 -4.18 %\n", + "37 Mar 2018 64.17 1.12 %\n", + "38 Apr 2018 68.79 7.20 %\n", + "39 May 2018 73.43 6.75 %\n", + "40 Jun 2018 71.98 -1.97 %\n", + "41 Jul 2018 72.67 0.96 %\n", + "42 Aug 2018 71.08 -2.19 %\n", + "43 Sep 2018 75.36 6.02 %\n", + "44 Oct 2018 76.73 1.82 %\n", + "45 Nov 2018 62.32 -18.78 %\n", + "46 Dec 2018 53.96 -13.41 %\n", + "47 Jan 2019 56.58 4.86 %\n", + "48 Feb 2019 61.13 8.04 %\n", + "49 Mar 2019 63.79 4.35 %\n", + "50 Apr 2019 68.58 7.51 %\n", + "51 May 2019 66.83 -2.55 %\n", + "52 Jun 2019 59.76 -10.58 %\n", + "53 Jul 2019 61.48 2.88 %\n", + "54 Aug 2019 57.67 -6.20 %\n", + "55 Sep 2019 60.04 4.11 %\n", + "56 Oct 2019 57.27 -4.61 %\n", + "57 Nov 2019 60.40 5.47 %\n", + "58 Dec 2019 63.35 4.88 %\n", + "59 Jan 2020 61.63 -2.72 %" + ] + }, + "execution_count": 251, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 342, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "import requests\n", + "\n", + "url='https://bj.lianjia.com/ershoufang/'\n", + "\n", + "\n", + "r = requests.get(url)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 355, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup(r.text,'html.parser')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 366, + "metadata": {}, + "outputs": [], + "source": [ + "content = soup.find('ul',{'class':'sellListContent'})\n", + "content2 = soup.select(\"div>ul>li>div\")" + ] + }, + { + "cell_type": "code", + "execution_count": 367, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 367, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "content2" + ] + }, + { + "cell_type": "code", + "execution_count": 392, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------\n", + "\n", + "宋家庄家园 - 刘家窑 \n", + "*******\n", + "3室1厅 | 89.24平米 | 西北 | 简装 | 高楼层(共29层) | 2011年建 | 塔楼\n", + "*******\n", + "383人关注 / 2个月以前发布\n", + "*******\n", + "近地铁VR看装修房本满五年\n", + "*******\n", + "460万单价51547元/平米\n", + "*******\n", + "-------\n", + "\n", + "易构空间 - 大望路 \n", + "*******\n", + "2室1厅 | 86.88平米 | 南 | 精装 | 中楼层(共14层) | 2004年建 | 板楼\n", + "*******\n", + "95人关注 / 9天以前发布\n", + "*******\n", + "近地铁VR房源房本满五年\n", + "*******\n", + "658万单价75737元/平米\n", + "*******\n", + "-------\n", + "\n", + "电子城小区 - 酒仙桥 \n", + "*******\n", + "2室1厅 | 75.11平米 | 东南 | 简装 | 底层(共28层) | 2001年建 | 塔楼\n", + "*******\n", + "370人关注 / 7个月以前发布\n", + "*******\n", + "VR房源房本满五年\n", + "*******\n", + "385万单价51259元/平米\n", + "*******\n", + "-------\n", + "\n", + "CBD传奇 - 大望路 \n", + "*******\n", + "1室0厅 | 37.03平米 | 北 | 简装 | 低楼层(共20层) | 2007年建 | 板塔结合\n", + "*******\n", + "223人关注 / 1个月以前发布\n", + "*******\n", + "近地铁VR房源房本满五年随时看房\n", + "*******\n", + "261万单价70484元/平米\n", + "*******\n", + "-------\n", + "\n", + "首开知语城 - 望京 \n", + "*******\n", + "3室2厅 | 171.81平米 | 南 北 | 精装 | 13层 | 2008年建 | 板楼\n", + "*******\n", + "116人关注 / 2个月以前发布\n", + "*******\n", + "近地铁VR房源房本满五年\n", + "*******\n", + "1120万单价65189元/平米\n", + "*******\n", + "-------\n", + "\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'div'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mchild\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdiv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdiv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnext_siblings\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"*******\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'div'" + ] + } + ], + "source": [ + "for child in soup.find('ul',{'class':'sellListContent'}).children:\n", + " \n", + " print(\"-------\")\n", + " print(child.a.get_text())\n", + " \n", + " \n", + " \n", + " for i in child.div.div.next_siblings:\n", + " print(i.get_text())\n", + " print(\"*******\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 349, + "metadata": {}, + "outputs": [], + "source": [ + "html2='
2室1厅 | 86.88平米 | 南 | 精装 | 中楼层(共14层) | 2004年建 | 板楼
95人关注 / 9天以前发布
近地铁VR房源房本满五年
658
单价75737元/平米
'" + ] + }, + { + "cell_type": "code", + "execution_count": 351, + "metadata": {}, + "outputs": [], + "source": [ + "soup = BeautifulSoup(html2,'lxml')" + ] + }, + { + "cell_type": "code", + "execution_count": 354, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " \n", + "
\n", + "
\n", + " \n", + " 易构空间满五年唯一全南两居室,视野好,私密性好\n", + " \n", + " \n", + " \n", + " 必看好房\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " \n", + " 易构空间\n", + " \n", + " -\n", + " \n", + " 大望路\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " 2室1厅 | 86.88平米 | 南 | 精装 | 中楼层(共14层) | 2004年建 | 板楼\n", + "
\n", + "
\n", + "
\n", + " \n", + " \n", + " 95人关注 / 9天以前发布\n", + "
\n", + "
\n", + " \n", + " 近地铁\n", + " \n", + " \n", + " VR房源\n", + " \n", + " \n", + " 房本满五年\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + " 658\n", + " \n", + " 万\n", + "
\n", + "
\n", + " \n", + " 单价75737元/平米\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + "\n" + ] + } + ], + "source": [ + "print(soup.prettify())" + ] + }, + { + "cell_type": "code", + "execution_count": 444, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "宋家庄 小三居 高楼层 满五年唯一包含综合地价款\n", + "460\n", + "单价51547元/平米\n", + "3室1厅 | 89.24平米 | 西北 | 简装 | 高楼层(共29层) | 2011年建 | 塔楼\n", + "易构空间满五年唯一全南两居室,视野好,私密性好\n", + "658\n", + "单价75737元/平米\n", + "2室1厅 | 86.88平米 | 南 | 精装 | 中楼层(共14层) | 2004年建 | 板楼\n", + "(新)东南向2居 安静不临街 商品房\n", + "385\n", + "单价51259元/平米\n", + "2室1厅 | 75.11平米 | 东南 | 简装 | 底层(共28层) | 2001年建 | 塔楼\n", + "CBD传奇二期,聚焦房,北向带燃气,业主急售,有钥匙\n", + "261\n", + "单价70484元/平米\n", + "1室0厅 | 37.03平米 | 北 | 简装 | 低楼层(共20层) | 2007年建 | 板塔结合\n", + "望京知语城 精装修 满五唯一 正南北通透 拎包入住\n", + "1120\n", + "单价65189元/平米\n", + "3室2厅 | 171.81平米 | 南 北 | 精装 | 13层 | 2008年建 | 板楼\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'find'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 29\u001b[0;31m \u001b[0mcrawl_page\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 30\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_element_by_link_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"2\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclick\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mcrawl_page\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minfos\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'div'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'title'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0mtotalPrice\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'div'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'priceInfo'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'div'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'totalPrice'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'span'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'find'" + ] + } + ], + "source": [ + "import selenium\n", + "from selenium import webdriver\n", + "driver = webdriver.Chrome(executable_path='/Users/liuhongyang/Desktop/chromedriver')\n", + "import time\n", + "\n", + "driver.get(\"https://bj.lianjia.com/ershoufang/\")\n", + "\n", + "def crawl_page():\n", + " html = driver.page_source\n", + "\n", + " soup = BeautifulSoup(html,'html.parser')\n", + "\n", + " infos = soup.find('ul',{'class','sellListContent'}).find_all('li')\n", + "\n", + "\n", + " for info in infos:\n", + " name = info.find('div',{'class':'title'}).find('a').get_text()\n", + " print(name)\n", + " totalPrice = info.find('div',{'class':'priceInfo'}).find('div',{'class':'totalPrice'}).find('span').get_text()\n", + " print(totalPrice)\n", + " unitPrice = info.find('div',{'class':'priceInfo'}).find('div',{'class':'unitPrice'}).find('span').get_text()\n", + " print(unitPrice)\n", + " address = info.find('div',{'class','address'}).find('div',{'class','houseInfo'}).get_text()\n", + " print(address)\n", + " with open('lianjia.csv','a',encoding='utf-8') as f:\n", + " f.write(\"{},{},{},{}\\n\".format(name,totalPrice,unitPrice,address))\n", + "\n", + "\n", + "crawl_page()\n", + "time.sleep(3)\n", + "driver.find_element_by_link_text(\"2\").click()\n", + "crawl_page()\n", + "time.sleep(3)\n", + "driver.find_element_by_link_text(\"3\").click()\n", + "crawl_page()\n", + "driver.find_element_by_link_text(\"4\").click()\n", + "crawl_page()\n", + "driver.find_element_by_link_text(u\"下一页\").click()\n", + "crawl_page()\n", + "driver.find_element_by_link_text(u\"下一页\").click()\n", + "crawl_page()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 433, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "url = 'https://bj.lianjia.com/ershoufang/pg3/'\n", + "\n", + "headers={\n", + " 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'\n", + "}\n", + "\n", + "\n", + "resp = requests.get(url,headers=headers)\n", + "\n", + "\n", + "#print(resp.content.decode('utf-8')) 网页内容 二进制\n", + "# print(resp.text) 网络内容 文本\n", + "\n", + "soup = BeautifulSoup(resp.text,'lxml')\n", + "\n", + "infos = soup.find('ul',{'class','sellListContent'}).find_all('li')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 438, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "不临街不把边 4层南北向三居室 一梯两户 接受换房\n", + "445\n", + "单价35660元/平米\n", + "3室1厅 | 124.79平米 | 南 北 | 毛坯 | 中楼层(共6层) | 2008年建 | 板楼\n", + "南北三居改大两居,精装修,诚心出售\n", + "868\n", + "单价70570元/平米\n", + "3室1厅 | 123平米 | 南 北 | 精装 | 中楼层(共26层) | 2008年建 | 板塔结合\n", + "炫彩嘉轩 正规一居室 视野开阔不临街\n", + "375\n", + "单价85364元/平米\n", + "1室1厅 | 43.93平米 | 西 | 精装 | 高楼层(共21层) | 2006年建 | 板塔结合\n", + "龙博苑小区南向正规一居室户型方正\n", + "332\n", + "单价55996元/平米\n", + "1室1厅 | 59.29平米 | 南 | 精装 | 低楼层(共6层) | 2004年建 | 板楼\n", + "本房是住宅 房子是七十年产权 业主诚意出售\n", + "445\n", + "单价37986元/平米\n", + "2室1厅 | 117.15平米 | 东 北 | 精装 | 20层 | 1997年建 | 塔楼\n", + "三环内 70年住宅 2居双卫 临近地铁8号线 交通便利\n", + "555\n", + "单价43428元/平米\n", + "2室1厅 | 127.8平米 | 西北 | 简装 | 低楼层(共27层) | 1999年建 | 塔楼\n", + "万科紫苑 南北通透全能小三居 精装修 保持好!\n", + "670\n", + "单价70364元/平米\n", + "3室2厅 | 95.22平米 | 南 北 | 精装 | 低楼层(共15层) | 2011年建 | 板楼\n", + "朝阳无限三居 满五唯一 带圆弧飘窗观景房 看房随时\n", + "660\n", + "单价44385元/平米\n", + "3室2厅 | 148.7平米 | 东南 北 | 简装 | 顶层(共21层) | 2004年建 | 板塔结合\n", + "满五年 南北双通透 诚心出售看房随时\n", + "620\n", + "单价43623元/平米\n", + "3室2厅 | 142.13平米 | 南 北 | 毛坯 | 低楼层(共11层) | 2003年建 | 板楼\n", + "天通苑东二区南北通透两居,商品房,户型方正\n", + "388\n", + "单价42873元/平米\n", + "2室1厅 | 90.5平米 | 南 北 | 简装 | 中楼层(共7层) | 2001年建 | 板楼\n", + "管理好 人文素质高 商品房 视野好\n", + "820\n", + "单价55775元/平米\n", + "2室2厅 | 147.02平米 | 西北 | 简装 | 中楼层(共18层) | 1998年建 | 塔楼\n", + "西什库什刹海北海北附近正规两居税少\n", + "825\n", + "单价158350元/平米\n", + "2室1厅 | 52.1平米 | 南 北 | 简装 | 高楼层(共7层) | 1992年建 | 板楼\n", + "精装修 中楼层 南北通透 满五唯一\n", + "133\n", + "单价18262元/平米\n", + "3室1厅 | 72.83平米 | 南 北 | 精装 | 中楼层(共4层) | 1986年建 | 板楼\n", + "西四环低密度德式花园下跃复式房源\n", + "1195\n", + "单价74852元/平米\n", + "3室2厅 | 159.65平米 | 南 北 | 精装 | 底层(共7层) | 2012年建 | 板楼\n", + "远洋一方,南向一居,中间楼层,满五唯一\n", + "360\n", + "单价55884元/平米\n", + "1室1厅 | 64.42平米 | 南 | 精装 | 低楼层(共15层) | 2010年建 | 板塔结合\n", + "万年花城 南北通透 前后带花园 板楼户型\n", + "515\n", + "单价68113元/平米\n", + "2室1厅 | 75.61平米 | 南 北 | 精装 | 底层(共8层) | 2006年建 | 板楼\n", + "郁花园一里 精装修 看房方便 落地飘窗\n", + "280\n", + "单价39955元/平米\n", + "1室1厅 | 70.08平米 | 东 西 | 精装 | 中楼层(共6层) | 2001年建 | 板楼\n", + "沸城南向两居室 看房方便 业主诚心出售\n", + "349\n", + "单价36983元/平米\n", + "2室2厅 | 94.37平米 | 西南 | 简装 | 底层(共25层) | 2007年建 | 板塔结合\n", + "四环内 地铁旁 低楼层 满五唯一\n", + "329\n", + "单价51367元/平米\n", + "2室1厅 | 64.05平米 | 南 北 | 简装 | 底层(共5层) | 2000年建 | 板楼\n", + "南北通透 把边户型 有钥匙 随时看\n", + "268\n", + "单价35488元/平米\n", + "2室1厅 | 75.52平米 | 南 西 北 | 简装 | 顶层(共6层) | 2000年建 | 板楼\n", + "南北向小两居 有钥匙看房方便 诚心出售\n", + "229\n", + "单价42345元/平米\n", + "2室1厅 | 54.08平米 | 南 北 | 简装 | 中楼层(共6层) | 1990年建 | 板楼\n", + "大峪南路一居室南北通透楼层适中,已满两年\n", + "143\n", + "单价36951元/平米\n", + "1室1厅 | 38.7平米 | 南 北 | 简装 | 中楼层(共6层) | 1991年建 | 板楼\n", + "汤泉墅南北通透正规两居室业主诚意出售\n", + "315\n", + "单价35051元/平米\n", + "3室1厅 | 89.87平米 | 南 北 | 简装 | 中楼层(共6层) | 2011年建 | 板楼\n", + "南北通透 带电梯 两居室 采光好\n", + "305\n", + "单价36358元/平米\n", + "2室2厅 | 83.89平米 | 南 北 | 简装 | 顶层(共18层) | 2012年建 | 板塔结合\n", + "满五年唯一 南北通透三居室 诚意出售 看房方便\n", + "484.5\n", + "单价38714元/平米\n", + "3室1厅 | 125.15平米 | 南 北 | 简装 | 顶层(共6层) | 2003年建 | 板楼\n", + "《555万》《两居室》《次顶层》\n", + "555\n", + "单价60936元/平米\n", + "2室1厅 | 91.08平米 | 南 北 | 精装 | 高楼层(共21层) | 2012年建 | 板楼\n", + "加州水郡带飘窗东向两居夏日看荷花\n", + "245\n", + "单价27640元/平米\n", + "2室1厅 | 88.64平米 | 东 | 精装 | 顶层(共14层) | 2011年建 | 塔楼\n", + "珠江御景南北通透,大三居,格局合理\n", + "415\n", + "单价31488元/平米\n", + "3室1厅 | 131.8平米 | 南 北 | 其他 | 高楼层(共11层) | 2007年建 | 暂无数据\n", + "精装修,小户型,总价低,无抵押\n", + "318\n", + "单价60967元/平米\n", + "1室0厅 | 52.16平米 | 北 | 简装 | 中楼层(共24层) | 2008年建 | 板塔结合\n", + "三环边 两居室 大阳台 2001年板楼 满五唯一 钢混结构\n", + "329\n", + "单价45474元/平米\n", + "2室1厅 | 72.35平米 | 西 | 精装 | 顶层(共6层) | 2001年建 | 板楼\n" + ] + } + ], + "source": [ + "for info in infos:\n", + " name = info.find('div',{'class':'title'}).find('a').get_text()\n", + " print(name)\n", + " totalPrice = info.find('div',{'class':'priceInfo'}).find('div',{'class':'totalPrice'}).find('span').get_text()\n", + " print(totalPrice)\n", + " unitPrice = info.find('div',{'class':'priceInfo'}).find('div',{'class':'unitPrice'}).find('span').get_text()\n", + " print(unitPrice)\n", + " address = info.find('div',{'class','address'}).find('div',{'class','houseInfo'}).get_text()\n", + " print(address)\n", + " with open('lianjia.csv','a',encoding='utf-8') as f:\n", + " f.write(\"{},{},{},{}\\n\".format(name,totalPrice,unitPrice,address))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import time\n", + "url = 'https://bj.lianjia.com/ershoufang/pg3/'\n", + "\n", + "page = 100\n", + "with open('lianjia.csv','a',encoding='utf-8') as f:\n", + " for i in range(page):\n", + " time.sleep(3)\n", + " url = 'https://bj.lianjia.com/ershoufang/pg'+str(i)\n", + "\n", + "\n", + " headers={\n", + " 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'\n", + " }\n", + "\n", + " resp = requests.get(url,headers=headers)\n", + "\n", + "\n", + " # print(resp.content.decode('utf-8')) 网页内容 二进制\n", + " # print(resp.text) 网络内容 文本\n", + "\n", + " soup = BeautifulSoup(resp.text,'lxml')\n", + "\n", + " infos = soup.find('ul',{'class','sellListContent'}).find_all('li')\n", + "\n", + "\n", + "\n", + " for info in infos:\n", + " name = info.find('div',{'class':'title'}).find('a').get_text()\n", + " #print(name)\n", + " \n", + " totalPrice = info.find('div',{'class':'priceInfo'}).find('div',{'class':'totalPrice'}).find('span').get_text()\n", + " #print(totalPrice)\n", + " unitPrice = info.find('div',{'class':'priceInfo'}).find('div',{'class':'unitPrice'}).find('span').get_text()\n", + " #print(unitPrice)\n", + " #address = info.find('div',{'class','address'}).find('div',{'class','houseInfo'}).get_text()\n", + " # print(address)\n", + "\n", + " list = address.split(\"|\")\n", + " \n", + " \n", + " f.write(\"{},{},{}\\n\".format(name,totalPrice,unitPrice))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crawling_program_lianjia/scraping.py b/crawling_program_lianjia/scraping.py new file mode 100644 index 0000000..57b3ace --- /dev/null +++ b/crawling_program_lianjia/scraping.py @@ -0,0 +1,16 @@ +# coding:utf-8 +import requests +from bs4 import BeautifulSoup +url = "https://www.163.com/" +data = requests.get(url).text +soup = BeautifulSoup(data,'lxml') +news_titles = soup.select("div>ul>li>a") + +for n in news_titles: + title = n.get_text() + link = n.get('href') + data = { + '标题' : title, + '链接' : link + } + print(data)