{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "\n", "\n", "## **Web Scraping** in **Bukalapak** \n", "```\n", "Code using python3\n", "for Jupyter Notebook\n", "\n", "Work in VS Code\n", "Not for google colab\n", "\n", "Link Web Scraping :\n", "https://bukalapak.com\n", "\n", "Code By Natasya \n", "medium.com/@liontin\n", "```" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "8Qm1gHsc9TfZ" }, "outputs": [], "source": [ "from urllib.request import Request, urlopen\n", "import pandas as pd\n", "import numpy as np\n", "import openpyxl\n", "import plotly.express as px\n", "import json" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sf8mPMS9tpxJ", "outputId": "0702cff3-4beb-4974-cf41-f22a54e203be" }, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "key = input('masukkan keyword :')\n", "pagestart = input('Page Number') # start dari page ke berapa?\n", "pagestop = input('Page Number') # stop page ke berapa?\n", "start = int(pagestart)\n", "stop = int(pagestop)\n", "\n", "data = []\n", "count =0\n", "for page in range(start, stop+1):\n", " url = 'http://api.bukalapak.com/multistrategy-products?keywords='+key+'&limit=50&offset=0&facet=true&page=%s' % page+'&filter_non_popular_section=true&access_token=Hh4u1p7rApxbvvjR9M0kTKA02ukiyljEPLWHLlBcNLQ4PA'\n", " req = Request(url , headers={'User-Agent': 'Mozilla/5.0'})\n", "\n", "\n", " webpage = urlopen(req).read()\n", " r = json.loads(webpage.decode('utf-8'))\n", " product = r['data']\n", " for p in product:\n", " nama = p['name']\n", " harga = p['price']\n", " stok = p['stock']\n", " kondisi = p['condition']\n", " stats = p['stats']['sold_count']\n", " toko = p['store']['name']\n", " rating = p['rating']['average_rate']\n", " link = p['url'].replace('https://', '')\n", " count+=1\n", " data.append([count, toko, nama, kondisi, harga, rating, stok, stats, link])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "id": "LqgT8a0pUhXS" }, "outputs": [], "source": [ "bukalapak = pd.DataFrame(data,columns=['No', 'Penjual','Title','Kondisi', 'Harga', 'Reting', 'Tersedia', 'Terjual', 'link'])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "QqaIB7e2XEkc", "outputId": "afb5c130-35ab-43d7-aaab-1399c14e141d" }, "outputs": [ { "data": { "text/html": [ "
\n", " | No | \n", "Penjual | \n", "Title | \n", "Kondisi | \n", "Harga | \n", "Reting | \n", "Tersedia | \n", "Terjual | \n", "link | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "superstore95 | \n", "Apple iPhone X 256GB | \n", "Baru | \n", "4499000 | \n", "4.9 | \n", "14 | \n", "54 | \n", "www.bukalapak.com/p/handphone/hp-smartphone/ip... | \n", "
1 | \n", "2 | \n", "Kino Phone Cell | \n", "iphone 11 64GB 128GB 256GB Bekas Second Ori 64... | \n", "Bekas | \n", "6550000 | \n", "4.9 | \n", "96 | \n", "326 | \n", "www.bukalapak.com/p/handphone/hp-smartphone/ip... | \n", "
2 | \n", "3 | \n", "superstore95 | \n", "Apple iPhone XR 64GB 128GB 256GB - Garansi 1 t... | \n", "Baru | \n", "4999000 | \n", "5.0 | \n", "5 | \n", "24 | \n", "www.bukalapak.com/p/handphone/hp-smartphone/ip... | \n", "
3 | \n", "4 | \n", "JD.ID Official Store | \n", "Iphone 12 Pro 256GB - Gold | \n", "Baru | \n", "17899000 | \n", "0.0 | \n", "4 | \n", "0 | \n", "www.bukalapak.com/p/handphone/hp-smartphone/ip... | \n", "
4 | \n", "5 | \n", "JD.ID Official Store | \n", "Iphone 13 mini 256GB - Blue | \n", "Baru | \n", "15719000 | \n", "0.0 | \n", "4 | \n", "0 | \n", "www.bukalapak.com/p/handphone/hp-smartphone/ip... | \n", "