February 21, 2023

WhatMobile
Product Scraping


Python Code

#importing all the required libraries

from bs4 import BeautifulSoup import requests import pandas as pd import urllib.parse

#website to scrap

website = 'https://www.whatmobile.com.pk/0_to_150001_Mobiles'

#creating connection with website

response = requests.get(website) response.status_code

#parsing the html code

soup = BeautifulSoup(response.content,'html.parser')

#storing html code of products in a result variable

results = soup.find_all('td',{'class':'BiggerText'})

#fetching name of first product

results[0].find('a',{'class':'BiggerText'}).get_text().replace('\n','')

#fetching proce of first product

results[0].find('span',{'class':'PriceFont'}).get_text().replace('\n','')

#Storing name,price and url in a list

product_name = [] product_price = [] relative_url = [] for result in results: #name try: product_name.append(result.find('a',{'class':'BiggerText'}).get_text().replace('\n','')) except: product_name.append('N.A') #price try: product_price.append(result.find('span',{'class':'PriceFont'}).get_text().replace('\n','')) except: product_price.append('N.A') #relative_url try: relative_url.append(result.find('a',{'class':'BiggerText'}).get('href')) except: relative_url.append('N.A')

#creating root URL root_url = 'https://www.whatmobile.com.pk'

#Combining URLs

url_combined = [] for link in relative_url: url_combined.append(urllib.parse.urljoin(root_url,link))

#creating dataframe and store data in variable

product_overview = pd.DataFrame({'Name': product_name,'Price': product_price, 'Url': url_combined})

#saving to Excel File

product_overview.to_excel('whatmobile.xlsx',index = 'False')