{ "cells": [ { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "#Necessary python libraries import (before import step, the installation of library is required in python environment)\n", "\n", "import requests #library for webscrapping\n", "from bs4 import BeautifulSoup #library for webscrapping\n", "import pandas as pd #basic library\n", "import re #library for regex (word matching)\n", "import datetime #library to use DateTime method" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "response: \n" ] } ], "source": [ "# URL of the webpage with the ongoing events\n", "url = \"https://en.wikipedia.org/wiki/Portal:Current_events\"\n", "\n", "# Send an HTTP GET request to the URL\n", "response = requests.get(url)\n", "print(\"response:\", response)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Headline Description Timestamp\n", "0 Disasters 2023 Atlantic hurricane season\\n2023 Pacific h... Economics\n", "1 Economics 2020–2023 global chip shortage\\n2021–2023 glob... Politics\n", "2 Politics Armenian protests\\nBelarus–European Union bord... edit section\n" ] } ], "source": [ "# Check if the response code is 200\n", "if response.status_code == 200:\n", " # Parse the HTML content with BeautifulSoup\n", " soup = BeautifulSoup(response.content, 'html.parser')\n", "\n", " # Find the div with role=\"region\" and aria-labelledby=\"Ongoing_events\"\n", " ongoing_events_div = soup.find('div', {'role': 'region', 'aria-labelledby': 'Ongoing_events'})\n", "\n", " if ongoing_events_div:\n", " headlines = []\n", " descriptions = []\n", " timestamps = []\n", "\n", " # Find all the h3 elements within the ongoing_events_div\n", " h3_elements = ongoing_events_div.find_all('h3')\n", "\n", " for h3 in h3_elements:\n", " # Get the headline text from the span with class \"mw-headline\"\n", " headline = h3.find('span', class_='mw-headline')\n", " if headline:\n", " headlines.append(headline.text.strip())\n", " else:\n", " headlines.append(\"No Headline\")\n", " # Get the description text from the following sibling (next_element)\n", " description = h3.find_next_sibling().text.strip()\n", " if description:\n", " descriptions.append(description)\n", " else:\n", " descriptions.append(\"No Description\")\n", "\n", " # Get the timestamp from the following sibling of the description\n", " timestamp = h3.find_next_sibling().find_next_sibling().text.strip()\n", " if timestamp:\n", " timestamps.append(timestamp)\n", " else:\n", " timestamps.append(\"No Timestamp\")\n", "\n", " # Create a DataFrame with the columns\n", " df = pd.DataFrame({\"Headline\": headlines, \"Description\": descriptions, \"Timestamp\": timestamps})\n", "\n", " # Display the DataFrame\n", " print(df)\n", " else:\n", " print(\"Couldn't find the 'mw-collapsible current-events-sidebar' div on the page.\")\n", "else:\n", " print(f\"HTTP request failed with status code {response.status_code}.\")\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionTimestamp
0Disasters2023 Atlantic hurricane season\\n2023 Pacific h...Economics
1Economics2020–2023 global chip shortage\\n2021–2023 glob...Politics
2PoliticsArmenian protests\\nBelarus–European Union bord...edit section
\n", "
" ], "text/plain": [ " Headline Description Timestamp\n", "0 Disasters 2023 Atlantic hurricane season\\n2023 Pacific h... Economics\n", "1 Economics 2020–2023 global chip shortage\\n2021–2023 glob... Politics\n", "2 Politics Armenian protests\\nBelarus–European Union bord... edit section" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Another way of displaying the dataframe\n", "df" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineTimestampDescription
0DisastersEconomics2023 Atlantic hurricane season
0DisastersEconomics2023 Pacific hurricane season
0DisastersEconomics2023 Pacific typhoon season
0DisastersEconomicsOpioid epidemic0(United States)
1EconomicsPolitics2020–2023 global chip shortage
1EconomicsPolitics2021–2023 global energy crisis
1EconomicsPolitics2021–2023 inflation surge
1EconomicsPolitics2022–2023 food crises
1EconomicsPoliticsGreat Resignation
1EconomicsPoliticsArgentine monetary crisis
1EconomicsPoliticsLebanese liquidity crisis
1EconomicsPoliticsPakistani economic crisis
1EconomicsPoliticsSouth African energy crisis
1EconomicsPoliticsSri Lankan economic crisis
1EconomicsPoliticsTurkish economic crisis
1EconomicsPoliticsUnited Kingdom cost-of-living crisis
2Politicsedit sectionArmenian protests
2Politicsedit sectionBelarus–European Union border crisis
2Politicsedit sectionFrench pension reform unrest
2Politicsedit sectionHaitian crisis
2Politicsedit sectionImpeachment inquiry against Joe Biden
2Politicsedit sectionIsraeli judicial reform protests
2Politicsedit sectionLibyan crisis
2Politicsedit sectionMyanmar protests
2Politicsedit sectionNigerien crisis
2Politicsedit sectionNorth Kosovo crisis
2Politicsedit sectionPakistan political unrest
2Politicsedit sectionPeruvian protests and political crisis
2Politicsedit sectionUkrainian refugee crisis
2Politicsedit sectionUnited Kingdom railway strikes
2Politicsedit sectionVenezuelan crisis
\n", "
" ], "text/plain": [ " Headline Timestamp Description\n", "0 Disasters Economics 2023 Atlantic hurricane season\n", "0 Disasters Economics 2023 Pacific hurricane season\n", "0 Disasters Economics 2023 Pacific typhoon season\n", "0 Disasters Economics Opioid epidemic0(United States)\n", "1 Economics Politics 2020–2023 global chip shortage\n", "1 Economics Politics 2021–2023 global energy crisis\n", "1 Economics Politics 2021–2023 inflation surge\n", "1 Economics Politics 2022–2023 food crises\n", "1 Economics Politics Great Resignation\n", "1 Economics Politics Argentine monetary crisis\n", "1 Economics Politics Lebanese liquidity crisis\n", "1 Economics Politics Pakistani economic crisis\n", "1 Economics Politics South African energy crisis\n", "1 Economics Politics Sri Lankan economic crisis\n", "1 Economics Politics Turkish economic crisis\n", "1 Economics Politics United Kingdom cost-of-living crisis\n", "2 Politics edit section Armenian protests\n", "2 Politics edit section Belarus–European Union border crisis\n", "2 Politics edit section French pension reform unrest\n", "2 Politics edit section Haitian crisis\n", "2 Politics edit section Impeachment inquiry against Joe Biden\n", "2 Politics edit section Israeli judicial reform protests\n", "2 Politics edit section Libyan crisis\n", "2 Politics edit section Myanmar protests\n", "2 Politics edit section Nigerien crisis\n", "2 Politics edit section North Kosovo crisis\n", "2 Politics edit section Pakistan political unrest\n", "2 Politics edit section Peruvian protests and political crisis\n", "2 Politics edit section Ukrainian refugee crisis\n", "2 Politics edit section United Kingdom railway strikes\n", "2 Politics edit section Venezuelan crisis" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Currently only 3 rows, one for each Headline\n", "# Next to split the clubbed values in the 'Description' Column\n", "\n", "# Split the 'Description' column by newline character ('\\n') and stack the resulting Series\n", "split_descriptions = df['Description'].str.split('\\n', expand=True).stack()\n", "\n", "# Reset the index to match the original DataFrame's structure\n", "split_descriptions = split_descriptions.reset_index(level=1, drop=True)\n", "\n", "# Rename the Series to 'Description' to match the original DataFrame\n", "split_descriptions.name = 'Description'\n", "\n", "# Drop the original 'Description' column from the original DataFrame\n", "df = df.drop('Description', axis=1)\n", "\n", "# Concatenate the original DataFrame with the split 'Description' Series\n", "df = pd.concat([df, split_descriptions], axis=1)\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineTimestampDescriptionEvent Start Year
0DisastersEconomicsAtlantic hurricane season2023
0DisastersEconomicsPacific hurricane season2023
0DisastersEconomicsPacific typhoon season2023
0DisastersEconomicsOpioid epidemic0(United States)None
1EconomicsPolitics– global chip shortage2020
1EconomicsPolitics– global energy crisis2021
1EconomicsPolitics– inflation surge2021
1EconomicsPolitics– food crises2022
1EconomicsPoliticsGreat ResignationNone
1EconomicsPoliticsArgentine monetary crisisNone
1EconomicsPoliticsLebanese liquidity crisisNone
1EconomicsPoliticsPakistani economic crisisNone
1EconomicsPoliticsSouth African energy crisisNone
1EconomicsPoliticsSri Lankan economic crisisNone
1EconomicsPoliticsTurkish economic crisisNone
1EconomicsPoliticsUnited Kingdom cost-of-living crisisNone
2Politicsedit sectionArmenian protestsNone
2Politicsedit sectionBelarus–European Union border crisisNone
2Politicsedit sectionFrench pension reform unrestNone
2Politicsedit sectionHaitian crisisNone
2Politicsedit sectionImpeachment inquiry against Joe BidenNone
2Politicsedit sectionIsraeli judicial reform protestsNone
2Politicsedit sectionLibyan crisisNone
2Politicsedit sectionMyanmar protestsNone
2Politicsedit sectionNigerien crisisNone
2Politicsedit sectionNorth Kosovo crisisNone
2Politicsedit sectionPakistan political unrestNone
2Politicsedit sectionPeruvian protests and political crisisNone
2Politicsedit sectionUkrainian refugee crisisNone
2Politicsedit sectionUnited Kingdom railway strikesNone
2Politicsedit sectionVenezuelan crisisNone
\n", "
" ], "text/plain": [ " Headline Timestamp Description \\\n", "0 Disasters Economics Atlantic hurricane season \n", "0 Disasters Economics Pacific hurricane season \n", "0 Disasters Economics Pacific typhoon season \n", "0 Disasters Economics Opioid epidemic0(United States) \n", "1 Economics Politics – global chip shortage \n", "1 Economics Politics – global energy crisis \n", "1 Economics Politics – inflation surge \n", "1 Economics Politics – food crises \n", "1 Economics Politics Great Resignation \n", "1 Economics Politics Argentine monetary crisis \n", "1 Economics Politics Lebanese liquidity crisis \n", "1 Economics Politics Pakistani economic crisis \n", "1 Economics Politics South African energy crisis \n", "1 Economics Politics Sri Lankan economic crisis \n", "1 Economics Politics Turkish economic crisis \n", "1 Economics Politics United Kingdom cost-of-living crisis \n", "2 Politics edit section Armenian protests \n", "2 Politics edit section Belarus–European Union border crisis \n", "2 Politics edit section French pension reform unrest \n", "2 Politics edit section Haitian crisis \n", "2 Politics edit section Impeachment inquiry against Joe Biden \n", "2 Politics edit section Israeli judicial reform protests \n", "2 Politics edit section Libyan crisis \n", "2 Politics edit section Myanmar protests \n", "2 Politics edit section Nigerien crisis \n", "2 Politics edit section North Kosovo crisis \n", "2 Politics edit section Pakistan political unrest \n", "2 Politics edit section Peruvian protests and political crisis \n", "2 Politics edit section Ukrainian refugee crisis \n", "2 Politics edit section United Kingdom railway strikes \n", "2 Politics edit section Venezuelan crisis \n", "\n", " Event Start Year \n", "0 2023 \n", "0 2023 \n", "0 2023 \n", "0 None \n", "1 2020 \n", "1 2021 \n", "1 2021 \n", "1 2022 \n", "1 None \n", "1 None \n", "1 None \n", "1 None \n", "1 None \n", "1 None \n", "1 None \n", "1 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None \n", "2 None " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Function to extract the year from the description\n", "def extract_year(description):\n", " # Use regular expressions to find a 4-digit year in the description\n", " year_match = re.search(r'\\b\\d{4}\\b', description)\n", " if year_match:\n", " return year_match.group(0)\n", " else:\n", " return None\n", "\n", "# Create a new column 'Event Year' by applying the extract_year function to the 'Description' column\n", "df['Event Start Year'] = df['Description'].apply(extract_year)\n", "\n", "# Remove the year value from the 'Description' column\n", "df['Description'] = df['Description'].apply(lambda x: re.sub(r'\\b\\d{4}\\b', '', x) if pd.notna(x) else x)\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start Year
0DisastersAtlantic hurricane season2023
0DisastersPacific hurricane season2023
0DisastersPacific typhoon season2023
0DisastersOpioid epidemic0(United States)None
1Economics– global chip shortage2020
1Economics– global energy crisis2021
1Economics– inflation surge2021
1Economics– food crises2022
1EconomicsGreat ResignationNone
1EconomicsArgentine monetary crisisNone
1EconomicsLebanese liquidity crisisNone
1EconomicsPakistani economic crisisNone
1EconomicsSouth African energy crisisNone
1EconomicsSri Lankan economic crisisNone
1EconomicsTurkish economic crisisNone
1EconomicsUnited Kingdom cost-of-living crisisNone
2PoliticsArmenian protestsNone
2PoliticsBelarus–European Union border crisisNone
2PoliticsFrench pension reform unrestNone
2PoliticsHaitian crisisNone
2PoliticsImpeachment inquiry against Joe BidenNone
2PoliticsIsraeli judicial reform protestsNone
2PoliticsLibyan crisisNone
2PoliticsMyanmar protestsNone
2PoliticsNigerien crisisNone
2PoliticsNorth Kosovo crisisNone
2PoliticsPakistan political unrestNone
2PoliticsPeruvian protests and political crisisNone
2PoliticsUkrainian refugee crisisNone
2PoliticsUnited Kingdom railway strikesNone
2PoliticsVenezuelan crisisNone
\n", "
" ], "text/plain": [ " Headline Description Event Start Year\n", "0 Disasters Atlantic hurricane season 2023\n", "0 Disasters Pacific hurricane season 2023\n", "0 Disasters Pacific typhoon season 2023\n", "0 Disasters Opioid epidemic0(United States) None\n", "1 Economics – global chip shortage 2020\n", "1 Economics – global energy crisis 2021\n", "1 Economics – inflation surge 2021\n", "1 Economics – food crises 2022\n", "1 Economics Great Resignation None\n", "1 Economics Argentine monetary crisis None\n", "1 Economics Lebanese liquidity crisis None\n", "1 Economics Pakistani economic crisis None\n", "1 Economics South African energy crisis None\n", "1 Economics Sri Lankan economic crisis None\n", "1 Economics Turkish economic crisis None\n", "1 Economics United Kingdom cost-of-living crisis None\n", "2 Politics Armenian protests None\n", "2 Politics Belarus–European Union border crisis None\n", "2 Politics French pension reform unrest None\n", "2 Politics Haitian crisis None\n", "2 Politics Impeachment inquiry against Joe Biden None\n", "2 Politics Israeli judicial reform protests None\n", "2 Politics Libyan crisis None\n", "2 Politics Myanmar protests None\n", "2 Politics Nigerien crisis None\n", "2 Politics North Kosovo crisis None\n", "2 Politics Pakistan political unrest None\n", "2 Politics Peruvian protests and political crisis None\n", "2 Politics Ukrainian refugee crisis None\n", "2 Politics United Kingdom railway strikes None\n", "2 Politics Venezuelan crisis None" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Drop the \"Timestamp\" column\n", "df = df.drop(columns=[\"Timestamp\"])\n", "df" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start Year
0DisastersAtlantic hurricane season2023
0DisastersPacific hurricane season2023
0DisastersPacific typhoon season2023
0DisastersOpioid epidemic0(United States)None
1Economics– global chip shortage2020
1Economics– global energy crisis2021
1Economics– inflation surge2021
1Economics– food crises2022
1EconomicsGreat ResignationNone
1EconomicsArgentine monetary crisisNone
1EconomicsLebanese liquidity crisisNone
1EconomicsPakistani economic crisisNone
1EconomicsSouth African energy crisisNone
1EconomicsSri Lankan economic crisisNone
1EconomicsTurkish economic crisisNone
1EconomicsUnited Kingdom cost-of-living crisisNone
2PoliticsArmenian protestsNone
2PoliticsBelarus–European Union border crisisNone
2PoliticsFrench pension reform unrestNone
2PoliticsHaitian crisisNone
2PoliticsImpeachment inquiry against Joe BidenNone
2PoliticsIsraeli judicial reform protestsNone
2PoliticsLibyan crisisNone
2PoliticsMyanmar protestsNone
2PoliticsNigerien crisisNone
2PoliticsNorth Kosovo crisisNone
2PoliticsPakistan political unrestNone
2PoliticsPeruvian protests and political crisisNone
2PoliticsUkrainian refugee crisisNone
2PoliticsUnited Kingdom railway strikesNone
2PoliticsVenezuelan crisisNone
\n", "
" ], "text/plain": [ " Headline Description Event Start Year\n", "0 Disasters Atlantic hurricane season 2023\n", "0 Disasters Pacific hurricane season 2023\n", "0 Disasters Pacific typhoon season 2023\n", "0 Disasters Opioid epidemic0(United States) None\n", "1 Economics – global chip shortage 2020\n", "1 Economics – global energy crisis 2021\n", "1 Economics – inflation surge 2021\n", "1 Economics – food crises 2022\n", "1 Economics Great Resignation None\n", "1 Economics Argentine monetary crisis None\n", "1 Economics Lebanese liquidity crisis None\n", "1 Economics Pakistani economic crisis None\n", "1 Economics South African energy crisis None\n", "1 Economics Sri Lankan economic crisis None\n", "1 Economics Turkish economic crisis None\n", "1 Economics United Kingdom cost-of-living crisis None\n", "2 Politics Armenian protests None\n", "2 Politics Belarus–European Union border crisis None\n", "2 Politics French pension reform unrest None\n", "2 Politics Haitian crisis None\n", "2 Politics Impeachment inquiry against Joe Biden None\n", "2 Politics Israeli judicial reform protests None\n", "2 Politics Libyan crisis None\n", "2 Politics Myanmar protests None\n", "2 Politics Nigerien crisis None\n", "2 Politics North Kosovo crisis None\n", "2 Politics Pakistan political unrest None\n", "2 Politics Peruvian protests and political crisis None\n", "2 Politics Ukrainian refugee crisis None\n", "2 Politics United Kingdom railway strikes None\n", "2 Politics Venezuelan crisis None" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Remove leading and trailing whitespace from all columns (if any)\n", "df = df.apply(lambda x: x.str.strip() if x.dtype == \"object\" else x)\n", "df" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start Year
0DisastersAtlantic hurricane season2023
0DisastersPacific hurricane season2023
0DisastersPacific typhoon season2023
0DisastersOpioid epidemic0United StatesNone
1Economicsglobal chip shortage2020
1Economicsglobal energy crisis2021
1Economicsinflation surge2021
1Economicsfood crises2022
1EconomicsGreat ResignationNone
1EconomicsArgentine monetary crisisNone
1EconomicsLebanese liquidity crisisNone
1EconomicsPakistani economic crisisNone
1EconomicsSouth African energy crisisNone
1EconomicsSri Lankan economic crisisNone
1EconomicsTurkish economic crisisNone
1EconomicsUnited Kingdom costofliving crisisNone
2PoliticsArmenian protestsNone
2PoliticsBelarusEuropean Union border crisisNone
2PoliticsFrench pension reform unrestNone
2PoliticsHaitian crisisNone
2PoliticsImpeachment inquiry against Joe BidenNone
2PoliticsIsraeli judicial reform protestsNone
2PoliticsLibyan crisisNone
2PoliticsMyanmar protestsNone
2PoliticsNigerien crisisNone
2PoliticsNorth Kosovo crisisNone
2PoliticsPakistan political unrestNone
2PoliticsPeruvian protests and political crisisNone
2PoliticsUkrainian refugee crisisNone
2PoliticsUnited Kingdom railway strikesNone
2PoliticsVenezuelan crisisNone
\n", "
" ], "text/plain": [ " Headline Description Event Start Year\n", "0 Disasters Atlantic hurricane season 2023\n", "0 Disasters Pacific hurricane season 2023\n", "0 Disasters Pacific typhoon season 2023\n", "0 Disasters Opioid epidemic0United States None\n", "1 Economics global chip shortage 2020\n", "1 Economics global energy crisis 2021\n", "1 Economics inflation surge 2021\n", "1 Economics food crises 2022\n", "1 Economics Great Resignation None\n", "1 Economics Argentine monetary crisis None\n", "1 Economics Lebanese liquidity crisis None\n", "1 Economics Pakistani economic crisis None\n", "1 Economics South African energy crisis None\n", "1 Economics Sri Lankan economic crisis None\n", "1 Economics Turkish economic crisis None\n", "1 Economics United Kingdom costofliving crisis None\n", "2 Politics Armenian protests None\n", "2 Politics BelarusEuropean Union border crisis None\n", "2 Politics French pension reform unrest None\n", "2 Politics Haitian crisis None\n", "2 Politics Impeachment inquiry against Joe Biden None\n", "2 Politics Israeli judicial reform protests None\n", "2 Politics Libyan crisis None\n", "2 Politics Myanmar protests None\n", "2 Politics Nigerien crisis None\n", "2 Politics North Kosovo crisis None\n", "2 Politics Pakistan political unrest None\n", "2 Politics Peruvian protests and political crisis None\n", "2 Politics Ukrainian refugee crisis None\n", "2 Politics United Kingdom railway strikes None\n", "2 Politics Venezuelan crisis None" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Remove special characters from the 'Description' column\n", "df['Description'] = df['Description'].apply(lambda x: re.sub(r'[^A-Za-z0-9\\s]+', '', x))\n", "df" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start Year
0DisastersAtlantic hurricane season2023
1DisastersPacific hurricane season2023
2DisastersPacific typhoon season2023
3DisastersOpioid epidemic0United StatesNone
4Economicsglobal chip shortage2020
5Economicsglobal energy crisis2021
6Economicsinflation surge2021
7Economicsfood crises2022
8EconomicsGreat ResignationNone
9EconomicsArgentine monetary crisisNone
10EconomicsLebanese liquidity crisisNone
11EconomicsPakistani economic crisisNone
12EconomicsSouth African energy crisisNone
13EconomicsSri Lankan economic crisisNone
14EconomicsTurkish economic crisisNone
15EconomicsUnited Kingdom costofliving crisisNone
16PoliticsArmenian protestsNone
17PoliticsBelarusEuropean Union border crisisNone
18PoliticsFrench pension reform unrestNone
19PoliticsHaitian crisisNone
20PoliticsImpeachment inquiry against Joe BidenNone
21PoliticsIsraeli judicial reform protestsNone
22PoliticsLibyan crisisNone
23PoliticsMyanmar protestsNone
24PoliticsNigerien crisisNone
25PoliticsNorth Kosovo crisisNone
26PoliticsPakistan political unrestNone
27PoliticsPeruvian protests and political crisisNone
28PoliticsUkrainian refugee crisisNone
29PoliticsUnited Kingdom railway strikesNone
30PoliticsVenezuelan crisisNone
\n", "
" ], "text/plain": [ " Headline Description Event Start Year\n", "0 Disasters Atlantic hurricane season 2023\n", "1 Disasters Pacific hurricane season 2023\n", "2 Disasters Pacific typhoon season 2023\n", "3 Disasters Opioid epidemic0United States None\n", "4 Economics global chip shortage 2020\n", "5 Economics global energy crisis 2021\n", "6 Economics inflation surge 2021\n", "7 Economics food crises 2022\n", "8 Economics Great Resignation None\n", "9 Economics Argentine monetary crisis None\n", "10 Economics Lebanese liquidity crisis None\n", "11 Economics Pakistani economic crisis None\n", "12 Economics South African energy crisis None\n", "13 Economics Sri Lankan economic crisis None\n", "14 Economics Turkish economic crisis None\n", "15 Economics United Kingdom costofliving crisis None\n", "16 Politics Armenian protests None\n", "17 Politics BelarusEuropean Union border crisis None\n", "18 Politics French pension reform unrest None\n", "19 Politics Haitian crisis None\n", "20 Politics Impeachment inquiry against Joe Biden None\n", "21 Politics Israeli judicial reform protests None\n", "22 Politics Libyan crisis None\n", "23 Politics Myanmar protests None\n", "24 Politics Nigerien crisis None\n", "25 Politics North Kosovo crisis None\n", "26 Politics Pakistan political unrest None\n", "27 Politics Peruvian protests and political crisis None\n", "28 Politics Ukrainian refugee crisis None\n", "29 Politics United Kingdom railway strikes None\n", "30 Politics Venezuelan crisis None" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Reset the DataFrame index\n", "df = df.reset_index(drop=True)\n", "df" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start YearEvent Location
0DisastersAtlantic hurricane season2023Atlantic
1DisastersPacific hurricane season2023Pacific
2DisastersPacific typhoon season2023Pacific
3DisastersOpioid epidemic0United StatesNoneUnited States
4Economicsglobal chip shortage2020global
5Economicsglobal energy crisis2021global
6Economicsinflation surge2021inflation
7Economicsfood crises2022food
8EconomicsGreat ResignationNoneGreat
9EconomicsArgentine monetary crisisNoneArgentine
10EconomicsLebanese liquidity crisisNoneLebanese
11EconomicsPakistani economic crisisNonePakistani
12EconomicsSouth African energy crisisNoneSouth African
13EconomicsSri Lankan economic crisisNoneSri Lankan
14EconomicsTurkish economic crisisNoneTurkish
15EconomicsUnited Kingdom costofliving crisisNoneUnited Kingdom
16PoliticsArmenian protestsNoneArmenian
17PoliticsBelarusEuropean Union border crisisNoneBelarusEuropean
18PoliticsFrench pension reform unrestNoneFrench
19PoliticsHaitian crisisNoneHaitian
20PoliticsImpeachment inquiry against Joe BidenNoneImpeachment
21PoliticsIsraeli judicial reform protestsNoneIsraeli
22PoliticsLibyan crisisNoneLibyan
23PoliticsMyanmar protestsNoneMyanmar
24PoliticsNigerien crisisNoneNigerien
25PoliticsNorth Kosovo crisisNoneNorth Kosovo
26PoliticsPakistan political unrestNonePakistan
27PoliticsPeruvian protests and political crisisNonePeruvian
28PoliticsUkrainian refugee crisisNoneUkrainian
29PoliticsUnited Kingdom railway strikesNoneUnited Kingdom
30PoliticsVenezuelan crisisNoneVenezuelan
\n", "
" ], "text/plain": [ " Headline Description Event Start Year \\\n", "0 Disasters Atlantic hurricane season 2023 \n", "1 Disasters Pacific hurricane season 2023 \n", "2 Disasters Pacific typhoon season 2023 \n", "3 Disasters Opioid epidemic0United States None \n", "4 Economics global chip shortage 2020 \n", "5 Economics global energy crisis 2021 \n", "6 Economics inflation surge 2021 \n", "7 Economics food crises 2022 \n", "8 Economics Great Resignation None \n", "9 Economics Argentine monetary crisis None \n", "10 Economics Lebanese liquidity crisis None \n", "11 Economics Pakistani economic crisis None \n", "12 Economics South African energy crisis None \n", "13 Economics Sri Lankan economic crisis None \n", "14 Economics Turkish economic crisis None \n", "15 Economics United Kingdom costofliving crisis None \n", "16 Politics Armenian protests None \n", "17 Politics BelarusEuropean Union border crisis None \n", "18 Politics French pension reform unrest None \n", "19 Politics Haitian crisis None \n", "20 Politics Impeachment inquiry against Joe Biden None \n", "21 Politics Israeli judicial reform protests None \n", "22 Politics Libyan crisis None \n", "23 Politics Myanmar protests None \n", "24 Politics Nigerien crisis None \n", "25 Politics North Kosovo crisis None \n", "26 Politics Pakistan political unrest None \n", "27 Politics Peruvian protests and political crisis None \n", "28 Politics Ukrainian refugee crisis None \n", "29 Politics United Kingdom railway strikes None \n", "30 Politics Venezuelan crisis None \n", "\n", " Event Location \n", "0 Atlantic \n", "1 Pacific \n", "2 Pacific \n", "3 United States \n", "4 global \n", "5 global \n", "6 inflation \n", "7 food \n", "8 Great \n", "9 Argentine \n", "10 Lebanese \n", "11 Pakistani \n", "12 South African \n", "13 Sri Lankan \n", "14 Turkish \n", "15 United Kingdom \n", "16 Armenian \n", "17 BelarusEuropean \n", "18 French \n", "19 Haitian \n", "20 Impeachment \n", "21 Israeli \n", "22 Libyan \n", "23 Myanmar \n", "24 Nigerien \n", "25 North Kosovo \n", "26 Pakistan \n", "27 Peruvian \n", "28 Ukrainian \n", "29 United Kingdom \n", "30 Venezuelan " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Function to extract the event location from the description\n", "def extract_event_location(description):\n", " # Use regular expressions to find the location based on keywords\n", " location_match = re.search(r'(?:United|Sri|South|North)\\s+\\w+|\\b(?:United|Sri|South|North)\\b', description)\n", " \n", " #Note: We have only added 4 options in location_match variable, we can add other words such as: \n", " #'New' for 'New Zealand',\n", " #'Costa' for 'Costa Rica',\n", " #'El' for 'El Salvador'\n", " \n", " if location_match:\n", " return location_match.group(0)\n", " else:\n", " # If none of the keywords are found, fetch the first word from the description\n", " words = description.split()\n", " if words:\n", " return words[0]\n", " else:\n", " return None\n", "\n", "# Apply the extract_event_location function to the 'Description' column\n", "df['Event Location'] = df['Description'].apply(extract_event_location)\n", "\n", "df\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start YearEvent LocationCorrect Location
0DisastersAtlantic hurricane season2023AtlanticAtlantic Ocean
1DisastersPacific hurricane season2023PacificPacific Ocean
2DisastersPacific typhoon season2023PacificPacific Ocean
3DisastersOpioid epidemic0United StatesNoneUnited StatesUnited States
4Economicsglobal chip shortage2020globalglobal
5Economicsglobal energy crisis2021globalglobal
6Economicsinflation surge2021inflationinflation
7Economicsfood crises2022foodfood
8EconomicsGreat ResignationNoneGreatGreat
9EconomicsArgentine monetary crisisNoneArgentineArgentina
10EconomicsLebanese liquidity crisisNoneLebaneseLebanon
11EconomicsPakistani economic crisisNonePakistaniPakistan
12EconomicsSouth African energy crisisNoneSouth AfricanSouth Africa
13EconomicsSri Lankan economic crisisNoneSri LankanSri Lanka
14EconomicsTurkish economic crisisNoneTurkishTurkey
15EconomicsUnited Kingdom costofliving crisisNoneUnited KingdomUnited Kingdom
16PoliticsArmenian protestsNoneArmenianArmenia
17PoliticsBelarusEuropean Union border crisisNoneBelarusEuropeanBelarusEuropean
18PoliticsFrench pension reform unrestNoneFrenchFrance
19PoliticsHaitian crisisNoneHaitianHaiti
20PoliticsImpeachment inquiry against Joe BidenNoneImpeachmentImpeachment
21PoliticsIsraeli judicial reform protestsNoneIsraeliIsrael
22PoliticsLibyan crisisNoneLibyanLibya
23PoliticsMyanmar protestsNoneMyanmarMyanmar
24PoliticsNigerien crisisNoneNigerienNiger
25PoliticsNorth Kosovo crisisNoneNorth KosovoKosovo
26PoliticsPakistan political unrestNonePakistanPakistan
27PoliticsPeruvian protests and political crisisNonePeruvianPeru
28PoliticsUkrainian refugee crisisNoneUkrainianUkraine
29PoliticsUnited Kingdom railway strikesNoneUnited KingdomUnited Kingdom
30PoliticsVenezuelan crisisNoneVenezuelanVenezuela
\n", "
" ], "text/plain": [ " Headline Description Event Start Year \\\n", "0 Disasters Atlantic hurricane season 2023 \n", "1 Disasters Pacific hurricane season 2023 \n", "2 Disasters Pacific typhoon season 2023 \n", "3 Disasters Opioid epidemic0United States None \n", "4 Economics global chip shortage 2020 \n", "5 Economics global energy crisis 2021 \n", "6 Economics inflation surge 2021 \n", "7 Economics food crises 2022 \n", "8 Economics Great Resignation None \n", "9 Economics Argentine monetary crisis None \n", "10 Economics Lebanese liquidity crisis None \n", "11 Economics Pakistani economic crisis None \n", "12 Economics South African energy crisis None \n", "13 Economics Sri Lankan economic crisis None \n", "14 Economics Turkish economic crisis None \n", "15 Economics United Kingdom costofliving crisis None \n", "16 Politics Armenian protests None \n", "17 Politics BelarusEuropean Union border crisis None \n", "18 Politics French pension reform unrest None \n", "19 Politics Haitian crisis None \n", "20 Politics Impeachment inquiry against Joe Biden None \n", "21 Politics Israeli judicial reform protests None \n", "22 Politics Libyan crisis None \n", "23 Politics Myanmar protests None \n", "24 Politics Nigerien crisis None \n", "25 Politics North Kosovo crisis None \n", "26 Politics Pakistan political unrest None \n", "27 Politics Peruvian protests and political crisis None \n", "28 Politics Ukrainian refugee crisis None \n", "29 Politics United Kingdom railway strikes None \n", "30 Politics Venezuelan crisis None \n", "\n", " Event Location Correct Location \n", "0 Atlantic Atlantic Ocean \n", "1 Pacific Pacific Ocean \n", "2 Pacific Pacific Ocean \n", "3 United States United States \n", "4 global global \n", "5 global global \n", "6 inflation inflation \n", "7 food food \n", "8 Great Great \n", "9 Argentine Argentina \n", "10 Lebanese Lebanon \n", "11 Pakistani Pakistan \n", "12 South African South Africa \n", "13 Sri Lankan Sri Lanka \n", "14 Turkish Turkey \n", "15 United Kingdom United Kingdom \n", "16 Armenian Armenia \n", "17 BelarusEuropean BelarusEuropean \n", "18 French France \n", "19 Haitian Haiti \n", "20 Impeachment Impeachment \n", "21 Israeli Israel \n", "22 Libyan Libya \n", "23 Myanmar Myanmar \n", "24 Nigerien Niger \n", "25 North Kosovo Kosovo \n", "26 Pakistan Pakistan \n", "27 Peruvian Peru \n", "28 Ukrainian Ukraine \n", "29 United Kingdom United Kingdom \n", "30 Venezuelan Venezuela " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "location_mapping = {\n", " 'Atlantic' : 'Atlantic Ocean',\n", " 'Pacific' : 'Pacific Ocean',\n", " 'French': 'France',\n", " 'Argentine' : 'Argentina',\n", " 'Lebanese' : 'Lebanon',\n", " 'Pakistani' : 'Pakistan',\n", " 'Peruvian' : 'Peru',\n", " 'Ukrainian' : 'Ukraine',\n", " 'Venezuelan' : 'Venezuela',\n", " 'North Kosovo' : 'Kosovo',\n", " 'Nigerien' : 'Niger',\n", " 'Libyan' : 'Libya',\n", " 'Israeli' : 'Israel',\n", " 'Haitian' : 'Haiti',\n", " 'Armenian': 'Armenia',\n", " 'Turkish' : 'Turkey',\n", " 'South African' : 'South Africa',\n", " 'Sri Lankan' : 'Sri Lanka'\n", "}\n", "\n", "# Function to correct event locations to country names\n", "def correct_event_location(event_location):\n", " return location_mapping.get(event_location, event_location)\n", "\n", "# Apply the correct_event_location function to the 'Event Location' column\n", "df['Correct Location'] = df['Event Location'].apply(correct_event_location)\n", "df\n", "\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start YearEvent LocationCorrect Location
0DisastersAtlantic hurricane season2023AtlanticAtlantic Ocean
1DisastersPacific hurricane season2023PacificPacific Ocean
2DisastersPacific typhoon season2023PacificPacific Ocean
3DisastersOpioid epidemic0United StatesNoneUnited StatesUnited States
4Economicsglobal chip shortage2020globalGlobal
5Economicsglobal energy crisis2021globalGlobal
6Economicsinflation surge2021inflationInflation
7Economicsfood crises2022foodFood
8EconomicsGreat ResignationNoneGreatGreat
9EconomicsArgentine monetary crisisNoneArgentineArgentina
10EconomicsLebanese liquidity crisisNoneLebaneseLebanon
11EconomicsPakistani economic crisisNonePakistaniPakistan
12EconomicsSouth African energy crisisNoneSouth AfricanSouth Africa
13EconomicsSri Lankan economic crisisNoneSri LankanSri Lanka
14EconomicsTurkish economic crisisNoneTurkishTurkey
15EconomicsUnited Kingdom costofliving crisisNoneUnited KingdomUnited Kingdom
16PoliticsArmenian protestsNoneArmenianArmenia
17PoliticsBelarusEuropean Union border crisisNoneBelarusEuropeanBelaruseuropean
18PoliticsFrench pension reform unrestNoneFrenchFrance
19PoliticsHaitian crisisNoneHaitianHaiti
20PoliticsImpeachment inquiry against Joe BidenNoneImpeachmentImpeachment
21PoliticsIsraeli judicial reform protestsNoneIsraeliIsrael
22PoliticsLibyan crisisNoneLibyanLibya
23PoliticsMyanmar protestsNoneMyanmarMyanmar
24PoliticsNigerien crisisNoneNigerienNiger
25PoliticsNorth Kosovo crisisNoneNorth KosovoKosovo
26PoliticsPakistan political unrestNonePakistanPakistan
27PoliticsPeruvian protests and political crisisNonePeruvianPeru
28PoliticsUkrainian refugee crisisNoneUkrainianUkraine
29PoliticsUnited Kingdom railway strikesNoneUnited KingdomUnited Kingdom
30PoliticsVenezuelan crisisNoneVenezuelanVenezuela
\n", "
" ], "text/plain": [ " Headline Description Event Start Year \\\n", "0 Disasters Atlantic hurricane season 2023 \n", "1 Disasters Pacific hurricane season 2023 \n", "2 Disasters Pacific typhoon season 2023 \n", "3 Disasters Opioid epidemic0United States None \n", "4 Economics global chip shortage 2020 \n", "5 Economics global energy crisis 2021 \n", "6 Economics inflation surge 2021 \n", "7 Economics food crises 2022 \n", "8 Economics Great Resignation None \n", "9 Economics Argentine monetary crisis None \n", "10 Economics Lebanese liquidity crisis None \n", "11 Economics Pakistani economic crisis None \n", "12 Economics South African energy crisis None \n", "13 Economics Sri Lankan economic crisis None \n", "14 Economics Turkish economic crisis None \n", "15 Economics United Kingdom costofliving crisis None \n", "16 Politics Armenian protests None \n", "17 Politics BelarusEuropean Union border crisis None \n", "18 Politics French pension reform unrest None \n", "19 Politics Haitian crisis None \n", "20 Politics Impeachment inquiry against Joe Biden None \n", "21 Politics Israeli judicial reform protests None \n", "22 Politics Libyan crisis None \n", "23 Politics Myanmar protests None \n", "24 Politics Nigerien crisis None \n", "25 Politics North Kosovo crisis None \n", "26 Politics Pakistan political unrest None \n", "27 Politics Peruvian protests and political crisis None \n", "28 Politics Ukrainian refugee crisis None \n", "29 Politics United Kingdom railway strikes None \n", "30 Politics Venezuelan crisis None \n", "\n", " Event Location Correct Location \n", "0 Atlantic Atlantic Ocean \n", "1 Pacific Pacific Ocean \n", "2 Pacific Pacific Ocean \n", "3 United States United States \n", "4 global Global \n", "5 global Global \n", "6 inflation Inflation \n", "7 food Food \n", "8 Great Great \n", "9 Argentine Argentina \n", "10 Lebanese Lebanon \n", "11 Pakistani Pakistan \n", "12 South African South Africa \n", "13 Sri Lankan Sri Lanka \n", "14 Turkish Turkey \n", "15 United Kingdom United Kingdom \n", "16 Armenian Armenia \n", "17 BelarusEuropean Belaruseuropean \n", "18 French France \n", "19 Haitian Haiti \n", "20 Impeachment Impeachment \n", "21 Israeli Israel \n", "22 Libyan Libya \n", "23 Myanmar Myanmar \n", "24 Nigerien Niger \n", "25 North Kosovo Kosovo \n", "26 Pakistan Pakistan \n", "27 Peruvian Peru \n", "28 Ukrainian Ukraine \n", "29 United Kingdom United Kingdom \n", "30 Venezuelan Venezuela " ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Convert 'Correct Location' column to sentence case\n", "df['Correct Location'] = df['Correct Location'].apply(lambda x: x.title() if pd.notna(x) else x)\n", "df" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "United Kingdom 2\n", "Global 2\n", "Pakistan 2\n", "Pacific Ocean 2\n", "Belaruseuropean 1\n", "Ukraine 1\n", "Peru 1\n", "Kosovo 1\n", "Niger 1\n", "Myanmar 1\n", "Libya 1\n", "Israel 1\n", "Impeachment 1\n", "Haiti 1\n", "France 1\n", "Atlantic Ocean 1\n", "Armenia 1\n", "Turkey 1\n", "Sri Lanka 1\n", "South Africa 1\n", "Lebanon 1\n", "Argentina 1\n", "Great 1\n", "Food 1\n", "Inflation 1\n", "United States 1\n", "Venezuela 1\n", "Name: Correct Location, dtype: int64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Discuss the Correct Location values\n", "df['Correct Location'].value_counts(dropna=False)\n", "\n", "#which one are valid values or not?" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start YearEvent LocationCorrect LocationDuration
0DisastersAtlantic hurricane season2023AtlanticAtlantic Ocean0 year
1DisastersPacific hurricane season2023PacificPacific Ocean0 year
2DisastersPacific typhoon season2023PacificPacific Ocean0 year
3DisastersOpioid epidemic0United StatesNoneUnited StatesUnited StatesNone
4Economicsglobal chip shortage2020globalGlobal3 years
5Economicsglobal energy crisis2021globalGlobal2 years
6Economicsinflation surge2021inflationInflation2 years
7Economicsfood crises2022foodFood1 year
8EconomicsGreat ResignationNoneGreatGreatNone
9EconomicsArgentine monetary crisisNoneArgentineArgentinaNone
10EconomicsLebanese liquidity crisisNoneLebaneseLebanonNone
11EconomicsPakistani economic crisisNonePakistaniPakistanNone
12EconomicsSouth African energy crisisNoneSouth AfricanSouth AfricaNone
13EconomicsSri Lankan economic crisisNoneSri LankanSri LankaNone
14EconomicsTurkish economic crisisNoneTurkishTurkeyNone
15EconomicsUnited Kingdom costofliving crisisNoneUnited KingdomUnited KingdomNone
16PoliticsArmenian protestsNoneArmenianArmeniaNone
17PoliticsBelarusEuropean Union border crisisNoneBelarusEuropeanBelaruseuropeanNone
18PoliticsFrench pension reform unrestNoneFrenchFranceNone
19PoliticsHaitian crisisNoneHaitianHaitiNone
20PoliticsImpeachment inquiry against Joe BidenNoneImpeachmentImpeachmentNone
21PoliticsIsraeli judicial reform protestsNoneIsraeliIsraelNone
22PoliticsLibyan crisisNoneLibyanLibyaNone
23PoliticsMyanmar protestsNoneMyanmarMyanmarNone
24PoliticsNigerien crisisNoneNigerienNigerNone
25PoliticsNorth Kosovo crisisNoneNorth KosovoKosovoNone
26PoliticsPakistan political unrestNonePakistanPakistanNone
27PoliticsPeruvian protests and political crisisNonePeruvianPeruNone
28PoliticsUkrainian refugee crisisNoneUkrainianUkraineNone
29PoliticsUnited Kingdom railway strikesNoneUnited KingdomUnited KingdomNone
30PoliticsVenezuelan crisisNoneVenezuelanVenezuelaNone
\n", "
" ], "text/plain": [ " Headline Description Event Start Year \\\n", "0 Disasters Atlantic hurricane season 2023 \n", "1 Disasters Pacific hurricane season 2023 \n", "2 Disasters Pacific typhoon season 2023 \n", "3 Disasters Opioid epidemic0United States None \n", "4 Economics global chip shortage 2020 \n", "5 Economics global energy crisis 2021 \n", "6 Economics inflation surge 2021 \n", "7 Economics food crises 2022 \n", "8 Economics Great Resignation None \n", "9 Economics Argentine monetary crisis None \n", "10 Economics Lebanese liquidity crisis None \n", "11 Economics Pakistani economic crisis None \n", "12 Economics South African energy crisis None \n", "13 Economics Sri Lankan economic crisis None \n", "14 Economics Turkish economic crisis None \n", "15 Economics United Kingdom costofliving crisis None \n", "16 Politics Armenian protests None \n", "17 Politics BelarusEuropean Union border crisis None \n", "18 Politics French pension reform unrest None \n", "19 Politics Haitian crisis None \n", "20 Politics Impeachment inquiry against Joe Biden None \n", "21 Politics Israeli judicial reform protests None \n", "22 Politics Libyan crisis None \n", "23 Politics Myanmar protests None \n", "24 Politics Nigerien crisis None \n", "25 Politics North Kosovo crisis None \n", "26 Politics Pakistan political unrest None \n", "27 Politics Peruvian protests and political crisis None \n", "28 Politics Ukrainian refugee crisis None \n", "29 Politics United Kingdom railway strikes None \n", "30 Politics Venezuelan crisis None \n", "\n", " Event Location Correct Location Duration \n", "0 Atlantic Atlantic Ocean 0 year \n", "1 Pacific Pacific Ocean 0 year \n", "2 Pacific Pacific Ocean 0 year \n", "3 United States United States None \n", "4 global Global 3 years \n", "5 global Global 2 years \n", "6 inflation Inflation 2 years \n", "7 food Food 1 year \n", "8 Great Great None \n", "9 Argentine Argentina None \n", "10 Lebanese Lebanon None \n", "11 Pakistani Pakistan None \n", "12 South African South Africa None \n", "13 Sri Lankan Sri Lanka None \n", "14 Turkish Turkey None \n", "15 United Kingdom United Kingdom None \n", "16 Armenian Armenia None \n", "17 BelarusEuropean Belaruseuropean None \n", "18 French France None \n", "19 Haitian Haiti None \n", "20 Impeachment Impeachment None \n", "21 Israeli Israel None \n", "22 Libyan Libya None \n", "23 Myanmar Myanmar None \n", "24 Nigerien Niger None \n", "25 North Kosovo Kosovo None \n", "26 Pakistan Pakistan None \n", "27 Peruvian Peru None \n", "28 Ukrainian Ukraine None \n", "29 United Kingdom United Kingdom None \n", "30 Venezuelan Venezuela None " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Function to calculate duration and format the result\n", "def calculate_duration(event_year):\n", " if pd.notna(event_year):\n", " current_year = datetime.datetime.now().year\n", " duration = current_year - int(event_year)\n", " if duration == 1 or duration == 0:\n", " return f\"{duration} year\"\n", " else:\n", " return f\"{duration} years\"\n", " else:\n", " return None\n", "\n", "# Create a new column 'Duration' using the calculate_duration function\n", "df['Duration'] = df['Event Start Year'].apply(calculate_duration)\n", "df" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HeadlineDescriptionEvent Start YearDurationEvent LocationCorrect Location
0DisastersAtlantic hurricane season20230 yearAtlanticAtlantic Ocean
1DisastersPacific hurricane season20230 yearPacificPacific Ocean
2DisastersPacific typhoon season20230 yearPacificPacific Ocean
3DisastersOpioid epidemic0United StatesNoneNoneUnited StatesUnited States
4Economicsglobal chip shortage20203 yearsglobalGlobal
5Economicsglobal energy crisis20212 yearsglobalGlobal
6Economicsinflation surge20212 yearsinflationInflation
7Economicsfood crises20221 yearfoodFood
8EconomicsGreat ResignationNoneNoneGreatGreat
9EconomicsArgentine monetary crisisNoneNoneArgentineArgentina
10EconomicsLebanese liquidity crisisNoneNoneLebaneseLebanon
11EconomicsPakistani economic crisisNoneNonePakistaniPakistan
12EconomicsSouth African energy crisisNoneNoneSouth AfricanSouth Africa
13EconomicsSri Lankan economic crisisNoneNoneSri LankanSri Lanka
14EconomicsTurkish economic crisisNoneNoneTurkishTurkey
15EconomicsUnited Kingdom costofliving crisisNoneNoneUnited KingdomUnited Kingdom
16PoliticsArmenian protestsNoneNoneArmenianArmenia
17PoliticsBelarusEuropean Union border crisisNoneNoneBelarusEuropeanBelaruseuropean
18PoliticsFrench pension reform unrestNoneNoneFrenchFrance
19PoliticsHaitian crisisNoneNoneHaitianHaiti
20PoliticsImpeachment inquiry against Joe BidenNoneNoneImpeachmentImpeachment
21PoliticsIsraeli judicial reform protestsNoneNoneIsraeliIsrael
22PoliticsLibyan crisisNoneNoneLibyanLibya
23PoliticsMyanmar protestsNoneNoneMyanmarMyanmar
24PoliticsNigerien crisisNoneNoneNigerienNiger
25PoliticsNorth Kosovo crisisNoneNoneNorth KosovoKosovo
26PoliticsPakistan political unrestNoneNonePakistanPakistan
27PoliticsPeruvian protests and political crisisNoneNonePeruvianPeru
28PoliticsUkrainian refugee crisisNoneNoneUkrainianUkraine
29PoliticsUnited Kingdom railway strikesNoneNoneUnited KingdomUnited Kingdom
30PoliticsVenezuelan crisisNoneNoneVenezuelanVenezuela
\n", "
" ], "text/plain": [ " Headline Description Event Start Year \\\n", "0 Disasters Atlantic hurricane season 2023 \n", "1 Disasters Pacific hurricane season 2023 \n", "2 Disasters Pacific typhoon season 2023 \n", "3 Disasters Opioid epidemic0United States None \n", "4 Economics global chip shortage 2020 \n", "5 Economics global energy crisis 2021 \n", "6 Economics inflation surge 2021 \n", "7 Economics food crises 2022 \n", "8 Economics Great Resignation None \n", "9 Economics Argentine monetary crisis None \n", "10 Economics Lebanese liquidity crisis None \n", "11 Economics Pakistani economic crisis None \n", "12 Economics South African energy crisis None \n", "13 Economics Sri Lankan economic crisis None \n", "14 Economics Turkish economic crisis None \n", "15 Economics United Kingdom costofliving crisis None \n", "16 Politics Armenian protests None \n", "17 Politics BelarusEuropean Union border crisis None \n", "18 Politics French pension reform unrest None \n", "19 Politics Haitian crisis None \n", "20 Politics Impeachment inquiry against Joe Biden None \n", "21 Politics Israeli judicial reform protests None \n", "22 Politics Libyan crisis None \n", "23 Politics Myanmar protests None \n", "24 Politics Nigerien crisis None \n", "25 Politics North Kosovo crisis None \n", "26 Politics Pakistan political unrest None \n", "27 Politics Peruvian protests and political crisis None \n", "28 Politics Ukrainian refugee crisis None \n", "29 Politics United Kingdom railway strikes None \n", "30 Politics Venezuelan crisis None \n", "\n", " Duration Event Location Correct Location \n", "0 0 year Atlantic Atlantic Ocean \n", "1 0 year Pacific Pacific Ocean \n", "2 0 year Pacific Pacific Ocean \n", "3 None United States United States \n", "4 3 years global Global \n", "5 2 years global Global \n", "6 2 years inflation Inflation \n", "7 1 year food Food \n", "8 None Great Great \n", "9 None Argentine Argentina \n", "10 None Lebanese Lebanon \n", "11 None Pakistani Pakistan \n", "12 None South African South Africa \n", "13 None Sri Lankan Sri Lanka \n", "14 None Turkish Turkey \n", "15 None United Kingdom United Kingdom \n", "16 None Armenian Armenia \n", "17 None BelarusEuropean Belaruseuropean \n", "18 None French France \n", "19 None Haitian Haiti \n", "20 None Impeachment Impeachment \n", "21 None Israeli Israel \n", "22 None Libyan Libya \n", "23 None Myanmar Myanmar \n", "24 None Nigerien Niger \n", "25 None North Kosovo Kosovo \n", "26 None Pakistan Pakistan \n", "27 None Peruvian Peru \n", "28 None Ukrainian Ukraine \n", "29 None United Kingdom United Kingdom \n", "30 None Venezuelan Venezuela " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#EXTRA (Just to showcase that the placing of columns can be changed easily)\n", "\n", "# Get the current column names\n", "columns = df.columns.tolist()\n", "\n", "# Move 'Duration' column next to 'Event Year' column\n", "columns.remove('Duration')\n", "columns.insert(columns.index('Event Start Year') + 1, 'Duration')\n", "\n", "# Reorder the columns in the DataFrame\n", "df = df[columns]\n", "df\n", "\n", "#Points to discuss:\n", "# Not all 'Correct Location' values are valid country/city names.\n", "# 'Global', 'Food', 'Inflation', 'Belaruseuropean' : such values requires more in-depth look, not just webscrapping." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }