February 21, 2023
WhatMobile
Product Scraping
Python Code
#importing all the required libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import urllib.parse
#website to scrap
website = 'https://www.whatmobile.com.pk/0_to_150001_Mobiles'
#creating connection with website
response = requests.get(website)
response.status_code
#parsing the html code
soup = BeautifulSoup(response.content,'html.parser')
#storing html code of products in a result variable
results = soup.find_all('td',{'class':'BiggerText'})
#fetching name of first product
results[0].find('a',{'class':'BiggerText'}).get_text().replace('\n','')
#fetching proce of first product
results[0].find('span',{'class':'PriceFont'}).get_text().replace('\n','')
#Storing name,price and url in a list
product_name = []
product_price = []
relative_url = []
for result in results:
#name
try:
product_name.append(result.find('a',{'class':'BiggerText'}).get_text().replace('\n',''))
except:
product_name.append('N.A')
#price
try:
product_price.append(result.find('span',{'class':'PriceFont'}).get_text().replace('\n',''))
except:
product_price.append('N.A')
#relative_url
try:
relative_url.append(result.find('a',{'class':'BiggerText'}).get('href'))
except:
relative_url.append('N.A')
#creating root URL
root_url = 'https://www.whatmobile.com.pk'
#Combining URLs
url_combined = []
for link in relative_url:
url_combined.append(urllib.parse.urljoin(root_url,link))
#creating dataframe and store data in variable
product_overview = pd.DataFrame({'Name': product_name,'Price': product_price, 'Url': url_combined})
#saving to Excel File
product_overview.to_excel('whatmobile.xlsx',index = 'False')