import pandas as pd
from bs4 import BeautifulSoup
import requests
= "https://msds-stat.rutgers.edu/msds-academics/msds-coursedesc"
url = requests.get(url)
response response
<Response [200]>
Scrape the list of courses given at https://msds-stat.rutgers.edu/msds-academics/msds-coursedesc, and tidy hte data into a pandas DataFrame.
import pandas as pd
from bs4 import BeautifulSoup
import requests
url = "https://msds-stat.rutgers.edu/msds-academics/msds-coursedesc"
response = requests.get(url)
response
<Response [200]>
'<!DOCTYPE html>\n<html lang="en-gb" dir="ltr">\n<head>\n\t<meta charset="utf-8">\n\t<meta name="viewport" content="width=device-width, initial-scale=1">\n\t<meta name="description" content="The School of Arts and Sciences, Rutgers, The State University of New Jersey">\n\t<meta name="generator" content="Joomla! - Open Source Content Management">\n\t<title>Course Descriptions</title>\n\t<link href="/msds-academics/msds-coursedesc?format=feed&type=rss" rel="alternate" type="application/rss+xml" title="Course Descriptions">\n\t<link href="/msds-academics/msds-coursedesc?format=feed&type=atom" rel="alternate" type="application/atom+xml" title="Course Descriptions">\n\t<link href="/media/templates/site/cassiopeia_sas/images/favicon.ico" rel="alternate icon" type="image/vnd.microsoft.icon">\n\t<link href="/media/system/images/joomla-favicon-pinned.svg" rel="mask-icon" color="#000">\n\n\t<link href="/media/system/css/joomla-fontawesome.min.css?b3c00803f7b963e4a421907d658c8bb8" rel="lazy-stylesheet" /><noscri'
elements = soup.find_all("div", class_='blog-item')
element = elements[0]
element.find('h2').text.strip()
'16:198:512 Introduction to Data Structures and Algorithms (3)'
'An introduction for students in other degree programs on Data Structures and Algorithms. The course studies a variety of useful algorithms and analyzes their complexity; students will gain insight into principles and data structures useful in algorithm design.\xa0Syllabus.pdf'
['16:198:512',
'Introduction',
'to',
'Data',
'Structures',
'and',
'Algorithms',
'(3)']
['Introduction', 'to', 'Data', 'Structures', 'and', 'Algorithms']
'Introduction to Data Structures and Algorithms'
data = [
{'title': ' '.join(element.find('h2').text.strip().split(' ')[1: -1]),
'courseNumber': element.find('h2').text.strip().split(' ')[0],
'credits': element.find('h2').text.strip().split(' ')[-1][1],
'description': element.find('p').text.strip()
}
for element in soup.find_all("div", class_='blog-item')
]
pd.DataFrame(data)
title | courseNumber | credits | description | |
---|---|---|---|---|
0 | Introduction to Data Structures and Algorithms | 16:198:512 | 3 | An introduction for students in other degree p... |
1 | Linear Programming | 16:198:521 | 3 | This course introduces modeling concepts, theo... |
2 | Database Management Systems | 16:198:539 | 3 | Implementing components of relational database... |
3 | Advanced Database Management | 16:198:541 | 3 | This course focuses on advanced topics in Data... |
4 | Convex Optimization for Engineering Applications | 16:332:509 | 3 | The course develops the necessary theory, algo... |
5 | Statistical Learning for Data Science | 16:954:534 | 3 | Advanced statistical learning methods are esse... |
6 | Statistical Models and Computing | 16:954:567 | 3 | This course is about advanced statistical mode... |
7 | Advanced Analytics using Statistical Software | 16:954:577 | 3 | Modeling and analysis of data, usually very la... |
8 | Probability and Statistical Inference for Data... | 16:954:581 | 3 | The study of probabilistic and inferential too... |
9 | Regression and Time Series Analysis for Data S... | 16:954:596 | 3 | This course introduces regression methods, sta... |
10 | Data Wrangling and Husbandry | 16:954:597 | 3 | This course provides an introduction to the pr... |
11 | Advanced Simulation Methods | 16:958:587 | 3 | The emphasis of this course will be on Modern ... |
12 | Financial Data Mining | 16:958:588 | 3 | Databases and data warehousing, exploratory da... |
13 | Advanced Programming for Financial Statistics ... | 16:958:589 | 3 | This course covers the basic concepts of objec... |