In [27]:
import os
import pandas
import ggplot
import requests
import matplotlib
%matplotlib inline
import dateutil.parser
from datetime import datetime
from bs4 import BeautifulSoup
from Pastebin import PastebinAPI
In [2]:
# Brent crude oil data source: http://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=RBRTE&f=D
home_path=os.environ['HOME']
crude_csv_file = os.path.join(home_path,"devel/ipython/data/Europe_Brent_Spot_Price_FOB.csv")
In [3]:
lines_to_skip = 5
names = ['day', 'price']
crude_df = pandas.read_csv(crude_csv_file, names=names, skiprows=lines_to_skip)
# Create a datetime field
crude_df['date'] = crude_df.day.map(lambda x: dateutil.parser.parse(x))
# Normalise price with 100% being the peak price
crude_df['crude_normed_price'] = crude_df.price / max(crude_df.price)
# Re-order the dataframe with the oldest records first
crude_df = crude_df.sort(ascending=False)
crude_df.head()
Out[3]:
In [4]:
crude_df[['date', 'price']].plot()
Out[4]:
In [48]:
g = ggplot.ggplot(crude_df, ggplot.aes('date', 'price')) + ggplot.geom_line()
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('9 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Brent Crude Price")
g
Out[48]:
In [49]:
g = ggplot.ggplot(crude_df, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line()
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('9 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Normalized Brent Crude Price")
g
Out[49]:
In [7]:
# Fetch kenya pump prices from the ERC
# Source: http://www.erc.go.ke/index.php?option=com_content&view=article&id=162&Itemid=666
erc_url = 'http://www.erc.go.ke/index.php?option=com_content&view=article&id=162&Itemid=666'
def get_available_erc_data_dates(erc_data_page_url):
'''
The data published by ERC is accessed using an index for a 'month' date_range.
We need to prefetch all the available date indices.
'''
available_dates = []
resp = requests.get(erc_data_page_url)
erc_content = BeautifulSoup(resp.content)
erc_data_table = erc_content.find(name='div', attrs={'class':"item-page"}).table
for option in erc_data_table.find('select').findAll():
available_dates.append({
'date_range': option.attrs.get('value'),
'value': option.text,
'datetime':dateutil.parser.parse(option.text.split('-')[0])
}
)
return available_dates
def fetch_erc_data_for_date_range(erc_data_url, date_range, date_range_datetime):
'''
ERC has a form that you post to a custom date range that spans a month's worth of time
We need to parse the html content and extract the data in html tables
'''
resp = requests.post(erc_data_url, data=dict(date_range=date_range, submitP='Search'))
erc_content = BeautifulSoup(resp.content)
erc_data_table = erc_content.find(name='div', attrs={'class':"item-page"}).table
data_rows = erc_data_table.find('table').findAll('tr')
fields = [header.text for header in data_rows[0].findAll('th')]
rows = []
for row in data_rows[1:]:
row_dict = dict(zip(fields, [column.text for column in row.findAll('td')]))
row_dict['date'] = date_range_datetime
rows.append(row_dict)
return rows
def get_erc_data_as_dict():
rows_of_dicts = []
for available_date in get_available_erc_data_dates(erc_url):
print "Fetching data for %s" % available_date.get('value')
rows = fetch_erc_data_for_date_range(erc_url, available_date.get('date_range'), available_date.get('datetime'))
rows_of_dicts.extend(rows)
return rows_of_dicts
def get_erc_data_as_df():
return pandas.DataFrame(get_erc_data_as_dict())
In [8]:
erc_prices_df = get_erc_data_as_df()
In [32]:
# Cached at: http://pastebin.com/Z8vqDCgr
# Loadable data: http://pastebin.com/raw/Z8vqDCgr
# data_url = PastebinAPI().paste(pastebin_api_key, erc_prices_df.to_csv(), paste_name="ERC Kenya Petroleum prices", paste_format="text")
# print data_url
In [9]:
erc_prices_df['super_normed_price'] = erc_prices_df.Super.astype(float) / max(erc_prices_df.Super.astype(float))
zero_idx = erc_prices_df.super_normed_price != 0
erc_prices_df_clean = erc_prices_df[zero_idx]
erc_prices_df_clean.head()
Out[9]:
In [10]:
erc_prices_nairobi_df = erc_prices_df_clean[erc_prices_df.Town=='Nairobi']
erc_prices_nairobi_df.head()
Out[10]:
In [46]:
g = ggplot.ggplot(erc_prices_nairobi_df, ggplot.aes('date', 'Super')) + ggplot.geom_line()
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Nairobi Super Pump Price")
g
Out[46]:
In [35]:
min_erc_date = min(erc_prices_nairobi_df.date)
erc_crude_date_intersection_df = crude_df[crude_df.date > min_erc_date]
# Renorm the data
erc_crude_date_intersection_df['crude_normed_price'] = erc_crude_date_intersection_df.price / max(erc_crude_date_intersection_df.price)
# Renorm this field for the subsetted dataframe
erc_prices_nairobi_df['super_normed_price'] = erc_prices_nairobi_df.super_normed_price / max(erc_prices_nairobi_df.super_normed_price)
In [44]:
# So let's compare the normalised crude oil purchase price to the normalised Nairobi Pump price
# Ideally, these graphs should be identical!
# If the oligopolies argue that they are a few months late in tracking the global crude prices,
# we should see the red dot graph slightly right shifted over the blue one.
g = ggplot.ggplot(erc_crude_date_intersection_df, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line(colour="green")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='super_normed_price'), data=erc_prices_nairobi_df, color="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("% drop in Brent Crude Cost vs Pump Prices in Kenya")
g
Out[44]:
In [43]:
# Let's plot the difference between the normalised crude oil price and the pump price,
# If profits margins are the same, we should see a straight line here, otherwise,
# - if the graph goes up with time, we are getting shafted.
# - if the graph goes down with time, we are getting a good deal.
# We have to renormalize again because merging may drop some rows.
merged_df = pandas.merge(erc_crude_date_intersection_df, erc_prices_nairobi_df, how='inner', on=['date', 'date'])
merged_df['normalized_margins'] = merged_df.super_normed_price / merged_df.crude_normed_price
merged_df['super_normed_price'] = merged_df.Super.astype(float) / max(merged_df.Super.astype(float))
merged_df['crude_normed_price'] = merged_df.price / max(merged_df.price)
g = ggplot.ggplot(merged_df, ggplot.aes('date', 'normalized_margins')) + ggplot.geom_line(colour="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Estimated Profit Margins based on Crude Prices vs Pump prices for Kenya")
g
Out[43]:
In [15]:
# As you can see from the graph above, we are thoroughly getting shafted.
# 1. 'Super' petrol margins have gone up by >200% over the past 2 years.
# 2. It's rather obvious that petrol companies in Kenya are not tracking global prices.
# If anything, they are keeping prices artificially high and pocketing the change
# In other words, Petrol companies are making 2-3 times the profit they made in 2013
In [16]:
# So the arguement might be that the Kenyan currency has lost value.
# We can work around this by converting the KES pump price to the equivalent USD value
# at the time by using historical data from CBK.
In [17]:
# Let's fetch Central bank of Kenya Forex data
# Source https://www.centralbank.go.ke/index.php/interest-rates/commercial-banks-weighted-av
cbk_data_url = 'https://www.centralbank.go.ke/index.php/interest-rates/commercial-banks-weighted-av'
def fetch_cbk_data_as_dicts(cbk_data_url):
resp = requests.post(cbk_data_url,
data=dict(
# Start date
date=22, month=1, year=2007,
# End date
tdate=22, tmonth=1, tyear=2016,
currency='US DOLLAR', searchForex='Search'),
# CBK has a braindead policy of blocking non-ui browser agents
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0'})
cbk_content = BeautifulSoup(resp.content)
cbk_data_table_header = cbk_content.find(name='div', attrs={'id':"interbank"}).table.find('tr', attrs={'class': 'tr_heading'})
headers = [td.text for td in cbk_data_table_header.findAll('td')]
rows_of_dicts = []
for table_row in cbk_data_table_header.fetchNextSiblings():
row = [td.text for td in table_row.findAll('td')]
rows_of_dicts.append(dict(zip(headers, row)))
return rows_of_dicts
def fetch_cbk_data_as_df(cbk_data_url):
return pandas.DataFrame(fetch_cbk_data_as_dicts(cbk_data_url))
In [18]:
cbk_usd_df = fetch_cbk_data_as_df(cbk_data_url)
cbk_usd_df['date'] = cbk_usd_df.Date.map(lambda x: dateutil.parser.parse(x, dayfirst=True))
cbk_usd_df = cbk_usd_df.sort(['date'], ascending=True)
cbk_usd_df.head(15)
Out[18]:
In [39]:
# Cached here: http://pastebin.com/345FHtWH
# data_url = PastebinAPI().paste(pastebin_api_key, cbk_usd_df.to_csv(), paste_name="CBK Kenya USD Forex rates", paste_format="text")
# print data_url
In [42]:
# Let's see what the USD / KES rate looks like
g = ggplot.ggplot(cbk_usd_df, ggplot.aes('date', 'Buy')) + ggplot.geom_line(colour="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('3 month'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Kenya USD Forex Rate")
g
Out[42]:
In [86]:
merged_df_with_usd = pandas.merge(merged_df, cbk_usd_df, how='inner', on=['date', 'date'])
# Convert the super price to USD
merged_df_with_usd['super_as_usd'] = merged_df_with_usd.Super.astype(float) / merged_df_with_usd.Mean.astype(float)
# Normalise the USD super price
merged_df_with_usd['normalised_super_as_usd'] = merged_df_with_usd.super_as_usd / max(merged_df_with_usd.super_as_usd)
# And the USD Margins
merged_df_with_usd['normalized_margins_usd'] = merged_df_with_usd.normalised_super_as_usd / merged_df_with_usd.crude_normed_price
merged_df_with_usd.head()
Out[86]:
In [41]:
# How does the USD price of the pump prices in Kenya look like?
g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'super_as_usd')) + ggplot.geom_line(colour="green")
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Kenya Super Price (USD)")
g
Out[41]:
In [51]:
# So let's compare how all three normalised prices.
# If there's a huge departured between the normalised_USD pump price and the KES pump price in the graph
# below, we can the ascribe the margin deltas to the devaluation of KES.
# If there change between the two graphs is marginal, then we can infer that the devaluation arguement is moot
g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line(colour="green")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='super_normed_price'), data=merged_df_with_usd, color="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalised_super_as_usd'), data=merged_df_with_usd, color="blue")
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("% Price Drop of KES Kenya Pump Price (Red dot) vs USD Kenya Pump Price (Blue dot) vs Crude Price (Green line)")
g
Out[51]:
In [62]:
# US Gas prices. Source: https://ycharts.com/indicators/gas_price
us_gas_price_url = 'https://ycharts.com/charts/fund_data/json?securities=type%3Aindicator%2Cid%3AI%3AUSRGP%2Cinclude%3Atrue%2C%2C&calcs=id%3Aprice%2Cinclude%3Atrue%2C%2C&correlations=&format=real&recessions=false&zoom=5&startDate=&endDate=&chartView=&splitType=&scaleType=¬e=&title=&source=&units="eLegend=&partner="es=&legendOnChart=&securitylistSecurityId=&clientGroupLogoUrl=&maxPoints=650&'
def get_us_gas_prices(us_gas_price_url):
resp = requests.get(us_gas_price_url)
us_gas_prices_json = resp.json()
return [{'date': datetime.fromtimestamp(row[0] / 1000), 'gallon_price': row[1]}
for row in us_gas_prices_json['chart_data'][0][0]['raw_data']]
us_gas_prices = get_us_gas_prices(us_gas_price_url)
us_gas_prices_df = pandas.DataFrame(us_gas_prices)
In [52]:
# Cached here: http://pastebin.com/Jda1iAAT
# data_url = PastebinAPI().paste(pastebin_api_key, us_gas_prices_df.to_csv(), paste_name="US Gas Cost from ycharts", paste_format="text")
# print data_url
In [64]:
g = ggplot.ggplot(us_gas_prices_df, ggplot.aes('date', 'gallon_price')) + ggplot.geom_line(colour="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("US Pump Price per Gallon")
g
Out[64]:
In [77]:
# Normalized and compared to KE & crude prices
min_erc_date = min(erc_prices_nairobi_df.date)
erc_us_gas_prices_df_intersection_df = us_gas_prices_df[us_gas_prices_df.date > min_erc_date.date()]
erc_us_gas_prices_df_intersection_df['normalized_gallon_price'] = erc_us_gas_prices_df_intersection_df.gallon_price / max(erc_us_gas_prices_df_intersection_df.gallon_price)
g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line(colour="green")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='super_normed_price'), data=merged_df_with_usd, color="red")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalized_gallon_price'), data=erc_us_gas_prices_df_intersection_df, color="brown")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalised_super_as_usd'), data=merged_df_with_usd, color="blue")
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("% Price Drop of KES Kenya Pump Price (Red dot) vs USD Kenya Pump Price (Blue dot) \n vs Crude Price (Green line) vs US Pump Prices(Brown)")
g
Out[77]:
In [87]:
# Granted that the KES has devalued in th recent past, there devalution has changed the USD pump price by at most 10%
# If we plot the margins, we should still see that the Oilers are making significantly more in profits
g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'normalized_margins')) + ggplot.geom_line(colour="red")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalized_margins_usd'), data=merged_df_with_usd, color="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90))
g
Out[87]:
In [96]:
# The points above still show that exchange rate impact is minimal to profit margins.
# If the oilers are to get the same profits as 2014-10-01 (~1.4 over the Brent crude as opposed to the current ~3.0),
# then the pump prices should be current price * 1.4 / 3.0
#In other words, we should be paying
todays_row = merged_df_with_usd.tail(1)
todays_row
fair_price = todays_row['crude_normed_price'].astype('float') * max(merged_df_with_usd['super_as_usd']).astype('float') * todays_row['Mean'].astype('float')
print "Fair pump price (KES): ",fair_price.values[0]
# Yep the figure is 36.30 Shillings.
# As USD
fair_price_usd = fair_price.values[0] / todays_row['Mean'].astype(float)
print "Fair pump price (USD): ", fair_price_usd.values[0]
# According to this: http://www.thenakedscientists.com/forum/index.php?topic=42516.0
# 1 barrel of oil ~ 42 us gallons
barrel_in_liters = 3.79 * 42
# Total cost of raw crude per liter in KES
raw_crude_cost_per_liter = todays_row['Mean'].astype(float) * todays_row['price'] / barrel_in_liters
print "Raw crude cost per liter (KES): ", raw_crude_cost_per_liter.values[0]
print "Cost of distillation + profit as a function of the raw product per liter (Unfair price):", (todays_row['Super'].astype('float') - raw_crude_cost_per_liter.values[0]).values[0]
print "Cost of distillation + profit as a function of the raw product per liter (Fair price):", (fair_price - raw_crude_cost_per_liter.values[0]).values[0]
print "Oilers extra profit per liter (over and above 2013 profit margins): ", (todays_row['Super'].astype('float') - fair_price).values[0]
In [97]:
# Summary
# Sources: https://ycharts.com/indicators/gas_price
# 1. US Gas (Petrol) prices were 3.764 in Mar 18 2013,
# 2. US Gas prices are now 2.02 in Jan 18 2016
# 3. Total percentage change: 46% drop in price: -46.3336875664187
print "Drop in US pump price:", (2.02 - 3.764) * 100 / 3.764
# 4. KE GAS prices in Mar 15 2013 was: 117.69
# 5. KE Gas Prices are now: 88.64
# 6. Total percentage change: 24% drop in price.
print "Drop in KE pump price:", (88.64 - 117.69) * 100 / 117.69
# Let's take care of devaluation
# 4. KE USD GAS prices in Mar 15 2013 was: 1.374304
# 5. KE USD Gas Prices are now: 0.866027
# 6. Total percentage change: 24% drop in price.
print "Drop in KE (USD) pump price:", (0.866027 - 1.374304) * 100 / 1.374304
# If we were to pay the US rate, this is the pump price that we should pay (inclusive of currency devaluation):
print "Pump price if we were to drop our prices to the same level as the US...: ", 46.3336875664/ 100 * 1.374304 * 102.3525
#(US % Drop) * Price in Mar 2013 as USD * Current USD/KES Forex rate
In [ ]: