{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## **Web Scraping** in **zone-h** \n", "```\n", "Code using python3\n", "for Jupyter Notebook\n", "\n", "Work in VS Code\n", "Not for google colab\n", "\n", "Link Web Scraping :\n", "http://zone-h.org\n", "\n", "Code By Natasya \n", "medium.com/@liontin\n", "```" ] }, { "cell_type": "code", "execution_count": 330, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup as Natasya\n", "import pandas as liontin\n", "import csv\n", "import plotly.express as zs\n" ] }, { "cell_type": "code", "execution_count": 331, "metadata": {}, "outputs": [], "source": [ "pagestart = input('Page Number') # start dari page ke berapa?\n", "pagestop = input('Page Number') # stop page ke berapa?\n", "savefile = input('File Name') # nama file yang akan disimpan (tanpa ekstensi)\n", "start = int(pagestart)\n", "stop = int(pagestop)\n", "with open(savefile+'.csv', 'a') as csvfile:\n", " writecsv = csv.writer(csvfile)\n", " writecsv.writerow([\"web\", \"Attacker\", \"waktu\", \"Arsip\"])\n", " for page in range(start, stop+1):\n", "\n", " #silahkan sesuaikan cookie pada website zone-h.org saat diakses\n", "\n", " myCookie = {\"PHPSESSID\": \"11qetovqgsvqklutldop2icmf3\", \"ZHE\": \"6cef7232835ba0ab755503ce1c2efedd\"}\n", " url = \"https://www.zone-h.org/archive/filter=1/published=0/special=1/domain=go.id/fulltext=1/page=%s\" % page\n", " req = requests.session()\n", "\n", " data = req.get(url, cookies=myCookie)\n", " dataH = data.content\n", " getdata = Natasya(dataH, 'html.parser')\n", " table = getdata.find('table')\n", " getlist = table.find_all('tr', class_=None)[1:]\n", " \n", " for ZH in getlist:\n", " kolom = ZH.findAll('td')\n", " if len(kolom) > 1:\n", " halaman = kolom[7].text.strip()\n", " attacker = kolom[1].text.strip()\n", " kejadian = kolom[0].text.replace('/', '-')\n", " arsip = kolom[9].find('a').get('href')\n", " writecsv.writerow([halaman, attacker, kejadian, str('www.zone-h.org%s' % arsip)])\n", " " ] }, { "cell_type": "code", "execution_count": 332, "metadata": {}, "outputs": [], "source": [ "zone_H = pd.read_csv(savefile+'.csv') # data csv disesuaikan dengan nama file sebelumnya yang telah disave" ] }, { "cell_type": "code", "execution_count": 333, "metadata": {}, "outputs": [], "source": [ "zoneH = liontin.DataFrame(zone_H)" ] }, { "cell_type": "code", "execution_count": 334, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
webAttackerwaktuArsip
0satpammantap.binmas.metro.polr...K4PUYU4K2022-01-26www.zone-h.org/mirror/id/39331180
1polreskapuas.kalteng.polri.go....K4PUYU4K2022-01-26www.zone-h.org/mirror/id/39331179
2polresbatu.jatim.polri.go.id/s...K4PUYU4K2022-01-26www.zone-h.org/mirror/id/39331178
3kapuas.kalteng.polri.go.id/pol...K4PUYU4K2022-01-26www.zone-h.org/mirror/id/39331177
4dprd.sukabumikota.go.id/a.txtgalehdotid2022-01-26www.zone-h.org/mirror/id/39329599
...............
70disperta.madiunkota.go.id/b4.html0x19982022-01-13www.zone-h.org/mirror/id/39259138
71itk.dprdsulsel.go.idSABUNMANDI CYBER TEAM2022-01-13www.zone-h.org/mirror/id/39258780
72pupr.padangpariamankab.go.idXNUXER ACHILL2022-01-13www.zone-h.org/mirror/id/39258682
73bpbiabiyoso.kemsos.go.id/index...elv1n42022-01-13www.zone-h.org/mirror/id/39258521
74epipad.probolinggokab.go.id/in...MR.5T1Y02022-01-12www.zone-h.org/mirror/id/39257787
\n", "

75 rows × 4 columns

\n", "
" ], "text/plain": [ " web Attacker waktu \\\n", "0 satpammantap.binmas.metro.polr... K4PUYU4K 2022-01-26 \n", "1 polreskapuas.kalteng.polri.go.... K4PUYU4K 2022-01-26 \n", "2 polresbatu.jatim.polri.go.id/s... K4PUYU4K 2022-01-26 \n", "3 kapuas.kalteng.polri.go.id/pol... K4PUYU4K 2022-01-26 \n", "4 dprd.sukabumikota.go.id/a.txt galehdotid 2022-01-26 \n", ".. ... ... ... \n", "70 disperta.madiunkota.go.id/b4.html 0x1998 2022-01-13 \n", "71 itk.dprdsulsel.go.id SABUNMANDI CYBER TEAM 2022-01-13 \n", "72 pupr.padangpariamankab.go.id XNUXER ACHILL 2022-01-13 \n", "73 bpbiabiyoso.kemsos.go.id/index... elv1n4 2022-01-13 \n", "74 epipad.probolinggokab.go.id/in... MR.5T1Y0 2022-01-12 \n", "\n", " Arsip \n", "0 www.zone-h.org/mirror/id/39331180 \n", "1 www.zone-h.org/mirror/id/39331179 \n", "2 www.zone-h.org/mirror/id/39331178 \n", "3 www.zone-h.org/mirror/id/39331177 \n", "4 www.zone-h.org/mirror/id/39329599 \n", ".. ... \n", "70 www.zone-h.org/mirror/id/39259138 \n", "71 www.zone-h.org/mirror/id/39258780 \n", "72 www.zone-h.org/mirror/id/39258682 \n", "73 www.zone-h.org/mirror/id/39258521 \n", "74 www.zone-h.org/mirror/id/39257787 \n", "\n", "[75 rows x 4 columns]" ] }, "execution_count": 334, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zoneH" ] }, { "cell_type": "code", "execution_count": 335, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "variable=Attacker
index=%{x}
value=%{y}", "legendgroup": "Attacker", "marker": { "color": "#636efa", "pattern": { "shape": "" } }, "name": "Attacker", "offsetgroup": "Attacker", "orientation": "v", "showlegend": true, "textposition": "auto", "type": "bar", "x": [ "AnonCoders Indonesia", "Zyyy", "Mr.spongebob", "MR.5T1Y0", "SABUNMANDI CYBER TEAM", "K4PUYU4K", "Mr.Spongebob", "0x1998", "Panoc Team", "/Rayzky_", "AnonCoders Czech Republic", "NULL SQU4D", "Ucup-Kun", "Mr.Rm19", "elv1n4", "RnpCyber", "XNUXER ACHILL", "Lolz@Junn-13.id", "hellow dunia", "galehdotid", "./G1L4N6_ST86", "LahBodoAmat" ], "xaxis": "x", "y": [ 23, 10, 6, 5, 4, 4, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ], "yaxis": "y" } ], "layout": { "barmode": "relative", "legend": { "title": { "text": "variable" }, "tracegroupgap": 0 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Data Kasus Peretasan website Di Pemerintahan Indonesia Berdasarkan Nickname" }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "index" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "value" } } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = zs.bar(zoneH['Attacker'].value_counts(),\n", " title=\"Data Kasus Peretasan website Di Pemerintahan Indonesia Berdasarkan Nickname\")\n", "fig.show()" ] } ], "metadata": { "interpreter": { "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" }, "kernelspec": { "display_name": "Python 3.6.8 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }