{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "## **Web Scraping** in **Progrez.cloud** \n", "```\n", "Code using python3\n", "for Jupyter Notebook\n", "\n", "Work in VS Code\n", "Not for google colab\n", "\n", "Link Web Scraping :\n", "https://progrez.cloud\n", "\n", "Code By Natasya \n", "medium.com/@liontin\n", "```" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "QxQcBXdJEBYk" }, "outputs": [], "source": [ "from urllib.request import Request, urlopen\n", "import pandas as pd\n", "import numpy as np\n", "import openpyxl\n", "import plotly.express as px" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8Qm1gHsc9TfZ", "outputId": "b79438f0-e3ad-47ab-c98e-b5a74f4c1a7d" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "*********************\n", "0\n", " Task Author Done at Progress\n", "0 Hardening jMozac NaN 50%\n", "1 Upgrade YarpCipher 3.0 to 3.5 jMozac NaN NaN\n", "2 ADEM jMozac 22 months ago NaN\n", "3 Dashboard jMozac NaN 80%\n", "4 History of mounth finish task Anak HONDA NaN NaN\n", ".. ... ... ... ...\n", "159 File Attachment jMozac jMozac NaN\n", "160 File Attachment jMozac jMozac NaN\n", "161 Blog jMozac 12 months ago NaN\n", "162 File Attachment jMozac jMozac NaN\n", "163 Change Templates jMozac 23 months ago NaN\n", "\n", "[164 rows x 4 columns]\n" ] } ], "source": [ "url = 'http://progrez.cloud/project/epf5j-progrezcloud-v20'\n", "req = Request(url , headers={'User-Agent': 'Mozilla/5.0'})\n", "\n", "webpage = urlopen(req).read()\n", "progrez = pd.read_html(webpage)\n", "for idx, task in enumerate(progrez):\n", " print(\"*********************\")\n", " print(idx)\n", " print(task)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 542 }, "id": "r-Qeb7qN943w", "outputId": "b2939140-7de1-4f23-dcd0-10a86af9467f" }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "
\n", "
\n", "\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.bar(task['Task'].value_counts(),\n", " title=\"Data Project Berdasarkan Task\")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 542 }, "id": "jToj9iUiEYLc", "outputId": "05699d69-174a-4255-af99-d6b66e2e9b1c" }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "
\n", "
\n", "\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.bar(task['Author'].value_counts(),\n", " title=\"Data Project Berdasarkan Author\")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 542 }, "id": "fcm4Ng7uGTKb", "outputId": "243b6d54-8966-4bf7-e990-a4c5e1db9623" }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "
\n", "
\n", "\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.bar(task['Progress'].value_counts(),\n", " title=\"Data Project Berdasarkan Progress\")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2Ut9XipxzJBS", "outputId": "6048babb-b55f-416a-ffee-afdb35b28f39" }, "outputs": [ { "data": { "text/plain": [ "100% 4\n", "0% 2\n", "50% 1\n", "80% 1\n", "60% 1\n", "85% 1\n", "40% 1\n", "Name: Progress, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "task['Progress'].value_counts().head(40)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "id": "sknfS_Kr8fh5" }, "outputs": [], "source": [ "task.to_excel('progrez.xlsx')" ] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyPWIV7DIrXEJ6h/FtFhJbvQ", "include_colab_link": true, "name": "WEB SCRAPPING-progrez.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }