{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## **Web Scraping** the unofficial **HackerOne** disclosure timeline.\n", "```\n", "Link Web Scraping :\n", "http://h1.nobbd.de/\n", "\n", "Code By Natasya \n", "medium.com/@liontin\n", "```" ] }, { "cell_type": "code", "execution_count": 271, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import numpy as np\n", "import plotly.express as px" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = []\n", "w = []\n", "url = 'http://h1.nobbd.de/index.php?start='\n", "for page in np.arange(0, 11840, (20)): \n", " req = requests.get(url + str(page))\n", " soup = BeautifulSoup(req.text, 'html.parser')\n", " h1 = soup.find_all('div',attrs={'class','report-wrapper'})\n", " for hack in h1:\n", " h2 = hack.find_all(\"div\",attrs={\"class\",\"report\"})\n", " for i in h2:\n", " layanan = i.find_all('b')[0].text.strip()\n", " try : report = i.find(\"a\", {\"class\": \"title\"})['title']\n", " except : report = ''\n", " bug_hunter = i.find_all('a')[1].text.strip()\n", " mirror = i.find(\"a\", {\"class\": \"title\"})['href']\n", " data.append({'Company': layanan , 'Title': report, 'Submit': bug_hunter, 'link': mirror})\n", " for d in h1:\n", " h3 = d.find_all(\"div\",attrs={\"class\",\"date\"})\n", " for date in h3:\n", " waktu = date.text.strip().split('wont-fix')\n", " w.append({'Date' : waktu})" ] }, { "cell_type": "code", "execution_count": 285, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame(data)\n", "df2 = pd.DataFrame(w)" ] }, { "cell_type": "code", "execution_count": 274, "metadata": {}, "outputs": [], "source": [ "h1_disclosure = pd.merge(df1, df2, left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 286, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Company | \n", "Title | \n", "Submit | \n", "link | \n", "Date | \n", "
---|---|---|---|---|---|
11805 | \n", "HackerOne | \n", "CSP not consistently applied | \n", "janpaul123 | \n", "https://hackerone.com/reports/321 | \n", "[30 Nov 2013] | \n", "
11806 | \n", "HackerOne | \n", "Pixel flood attack | \n", "dutchgraa | \n", "https://hackerone.com/reports/390 | \n", "[30 Nov 2013] | \n", "
11807 | \n", "HackerOne | \n", "GIF flooding | \n", "dutchgraa | \n", "https://hackerone.com/reports/400 | \n", "[30 Nov 2013] | \n", "
11808 | \n", "Ruby | \n", "Ruby: Heap Overflow in Floating Point Parsing | \n", "charliesome | \n", "https://hackerone.com/reports/499 | \n", "[22 Nov 2013] | \n", "
11809 | \n", "The Internet | \n", "OpenSSH: Memory corruption in AES-GCM support | \n", "markus | \n", "https://hackerone.com/reports/500 | \n", "[07 Nov 2013] | \n", "