In [27]:

import os
import pandas
import ggplot
import requests
import matplotlib
%matplotlib inline  
import dateutil.parser
from datetime import datetime
from bs4 import BeautifulSoup
from Pastebin import PastebinAPI

In [2]:

# Brent crude oil data source: http://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=PET&s=RBRTE&f=D
home_path=os.environ['HOME']
crude_csv_file = os.path.join(home_path,"devel/ipython/data/Europe_Brent_Spot_Price_FOB.csv")

In [3]:

lines_to_skip = 5
names = ['day', 'price']
crude_df = pandas.read_csv(crude_csv_file, names=names, skiprows=lines_to_skip)

# Create a datetime field
crude_df['date'] = crude_df.day.map(lambda x: dateutil.parser.parse(x))

# Normalise price with 100% being the peak price
crude_df['crude_normed_price'] =  crude_df.price / max(crude_df.price)


# Re-order the dataframe with the oldest records first
crude_df = crude_df.sort(ascending=False)
crude_df.head()

/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/ipykernel/__main__.py:13: FutureWarning: sort(....) is deprecated, use sort_index(.....)

Out[3]:

	day	price	date	crude_normed_price
7273	05/20/1987	18.63	1987-05-20	0.129420
7272	05/21/1987	18.45	1987-05-21	0.128170
7271	05/22/1987	18.55	1987-05-22	0.128864
7270	05/25/1987	18.60	1987-05-25	0.129212
7269	05/26/1987	18.63	1987-05-26	0.129420

In [4]:

crude_df[['date', 'price']].plot()

Out[4]:

<matplotlib.axes._subplots.AxesSubplot at 0x115a902d0>

In [48]:

g = ggplot.ggplot(crude_df, ggplot.aes('date', 'price')) + ggplot.geom_line()  
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('9 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Brent Crude Price")
g

Out[48]:

<ggplot: (297029425)>

In [49]:

g = ggplot.ggplot(crude_df, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line()  
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('9 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Normalized Brent Crude Price")
g

Out[49]:

<ggplot: (297251469)>

In [7]:

# Fetch kenya pump prices from the ERC
# Source: http://www.erc.go.ke/index.php?option=com_content&view=article&id=162&Itemid=666

erc_url = 'http://www.erc.go.ke/index.php?option=com_content&view=article&id=162&Itemid=666'

def get_available_erc_data_dates(erc_data_page_url):
    '''
    The data published by ERC is accessed using an index for a 'month' date_range.
    We need to prefetch all the available date indices.
    '''
    available_dates = []
    resp = requests.get(erc_data_page_url)
    erc_content = BeautifulSoup(resp.content)
    erc_data_table = erc_content.find(name='div', attrs={'class':"item-page"}).table
    for option in erc_data_table.find('select').findAll():
        available_dates.append({
                'date_range': option.attrs.get('value'), 
                'value': option.text,
                'datetime':dateutil.parser.parse(option.text.split('-')[0]) 
            }
        )
    return available_dates

def fetch_erc_data_for_date_range(erc_data_url, date_range, date_range_datetime):
    '''
    ERC has a form that you post to a custom date range that spans a month's worth of time
    We need to parse the html content and extract the data in html tables
    '''
    resp = requests.post(erc_data_url, data=dict(date_range=date_range, submitP='Search'))
    erc_content = BeautifulSoup(resp.content)
    erc_data_table = erc_content.find(name='div', attrs={'class':"item-page"}).table
    data_rows = erc_data_table.find('table').findAll('tr')
    fields = [header.text for header  in data_rows[0].findAll('th')]

    rows = []
    for row in data_rows[1:]:
        row_dict = dict(zip(fields, [column.text for column in row.findAll('td')]))
        row_dict['date'] = date_range_datetime
        rows.append(row_dict)

    return rows

def get_erc_data_as_dict():
    rows_of_dicts = []
    for available_date in get_available_erc_data_dates(erc_url):
        print "Fetching data for %s" % available_date.get('value')
        rows = fetch_erc_data_for_date_range(erc_url, available_date.get('date_range'),  available_date.get('datetime'))
        rows_of_dicts.extend(rows)
    return rows_of_dicts

def get_erc_data_as_df():
    return pandas.DataFrame(get_erc_data_as_dict())

In [8]:

erc_prices_df = get_erc_data_as_df()

Fetching data for Jan 15th, 2016 - Feb 14th, 2016 
Fetching data for Dec 15th, 2015 - Jan 14th, 2016 
Fetching data for Nov 15th, 2015 - Dec 14th, 2015 
Fetching data for Oct 15th, 2015 - Nov 14th, 2015 
Fetching data for Sep 15th, 2015 - Oct 14th, 2015 
Fetching data for Aug 15th, 2015 - Sep 14th, 2015 
Fetching data for Jul 15th, 2015 - Aug 14th, 2015 
Fetching data for Jun 15th, 2015 - Jul 14th, 2015 
Fetching data for May 15th, 2015 - Jun 14th, 2015 
Fetching data for Apr 15th, 2015 - May 14th, 2015 
Fetching data for Mar 15th, 2015 - Apr 14th, 2015 
Fetching data for Feb 16th, 2015 - Mar 15th, 2015 
Fetching data for Jan 15th, 2015 - Feb 14th, 2015 
Fetching data for Dec 15th, 2014 - Jan 14th, 2015 
Fetching data for Nov 15th, 2014 - Dec 14th, 2014 
Fetching data for Oct 15th, 2014 - Nov 14th, 2014 
Fetching data for Sep 15th, 2014 - Oct 15th, 2014 
Fetching data for Aug 15th, 2014 - Sep 14th, 2014 
Fetching data for Jul 15th, 2014 - Aug 14th, 2014 
Fetching data for Jun 15th, 2014 - Jul 15th, 2014 
Fetching data for May 15th, 2014 - Jun 15th, 2014 
Fetching data for Apr 15th, 2014 - May 14th, 2014 
Fetching data for Mar 15th, 2014 - Apr 14th, 2014 
Fetching data for Feb 15th, 2014 - Mar 14th, 2014 
Fetching data for Jan 15th, 2014 - Feb 14th, 2014 
Fetching data for Dec 15th, 2013 - Jan 14th, 2014 
Fetching data for Nov 15th, 2013 - Dec 14th, 2013 
Fetching data for Oct 15th, 2013 - Nov 14th, 2013 
Fetching data for Sep 15th, 2013 - Oct 14th, 2013 
Fetching data for Aug 15th, 2013 - Sep 14th, 2013 
Fetching data for Jul 15th, 2013 - Aug 14th, 2013 
Fetching data for Jun 15th, 2013 - Jul 14th, 2013 
Fetching data for May 15th, 2013 - Jun 14th, 2013 
Fetching data for Apr 15th, 2013 - May 14th, 2013 
Fetching data for Mar 15th, 2013 - Apr 14th, 2013 
Fetching data for Feb 15th, 2013 - Mar 14th, 2013

/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/bs4/__init__.py:166: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

To get rid of this warning, change this:

 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))

In [32]:

# Cached at: http://pastebin.com/Z8vqDCgr
# Loadable data: http://pastebin.com/raw/Z8vqDCgr
# data_url =  PastebinAPI().paste(pastebin_api_key, erc_prices_df.to_csv(), paste_name="ERC Kenya Petroleum prices", paste_format="text")
# print data_url

http://pastebin.com/Z8vqDCgr

In [9]:

erc_prices_df['super_normed_price'] =  erc_prices_df.Super.astype(float) / max(erc_prices_df.Super.astype(float))
zero_idx = erc_prices_df.super_normed_price != 0
erc_prices_df_clean = erc_prices_df[zero_idx]
erc_prices_df_clean.head()

Out[9]:

	Diesel	Kerosene	Regular	Super	Town	date	super_normed_price
0	79.55	48.93	108.03	91.31	Bomet	2016-01-15	0.700069
1	79.14	48.32	107.58	90.9	Bondo	2016-01-15	0.696926
2	79.55	48.8	108.01	91.3	Bungoma	2016-01-15	0.699992
3	80.14	49.32	108.48	91.9	Busia	2016-01-15	0.704593
4	78.44	47.87	107.34	90.38	Chuka	2016-01-15	0.692939

In [10]:

erc_prices_nairobi_df = erc_prices_df_clean[erc_prices_df.Town=='Nairobi']
erc_prices_nairobi_df.head()

/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/pandas/core/frame.py:1997: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)

Out[10]:

	Diesel	Kerosene	Regular	Super	Town	date	super_normed_price
50	76.7	46.13	105.76	88.64	Nairobi	2016-01-15	0.679598
120	78.51	53.27	105.76	90.06	Nairobi	2015-12-15	0.690485
190	79.66	54.81	105.76	90.46	Nairobi	2015-11-15	0.693552
260	82.43	56.04	105.76	93.29	Nairobi	2015-10-15	0.715250
330	79.99	52.55	0	102.65	Nairobi	2015-09-15	0.787012

In [46]:

g = ggplot.ggplot(erc_prices_nairobi_df, ggplot.aes('date', 'Super')) + ggplot.geom_line()  
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Nairobi Super Pump Price")
g

Out[46]:

<ggplot: (296571333)>

In [35]:

min_erc_date = min(erc_prices_nairobi_df.date)
erc_crude_date_intersection_df = crude_df[crude_df.date > min_erc_date]

# Renorm the data
erc_crude_date_intersection_df['crude_normed_price'] =  erc_crude_date_intersection_df.price / max(erc_crude_date_intersection_df.price)
# Renorm this field for the subsetted dataframe
erc_prices_nairobi_df['super_normed_price'] =  erc_prices_nairobi_df.super_normed_price / max(erc_prices_nairobi_df.super_normed_price)

/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [44]:

# So let's compare the normalised crude oil purchase price to the normalised Nairobi Pump price
# Ideally, these graphs should be identical!
# If the oligopolies argue that they are a few months late in tracking the global crude prices,
#  we should see the red dot graph slightly right shifted over the blue one.
g = ggplot.ggplot(erc_crude_date_intersection_df, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line(colour="green") 
g = g + ggplot.geom_point(ggplot.aes(x='date', y='super_normed_price'), data=erc_prices_nairobi_df,  color="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("% drop in Brent Crude Cost vs Pump Prices in Kenya")
g

Out[44]:

<ggplot: (295938557)>

In [43]:

# Let's plot the difference between the normalised crude oil price and the pump price,
# If profits margins are the same, we should see a straight line here, otherwise,
#  - if the graph goes up with time, we are getting shafted.
#  - if the graph goes down with time, we are getting a good deal.

# We have to renormalize again because merging may drop some rows.
merged_df = pandas.merge(erc_crude_date_intersection_df, erc_prices_nairobi_df, how='inner', on=['date', 'date'])
merged_df['normalized_margins'] = merged_df.super_normed_price / merged_df.crude_normed_price
merged_df['super_normed_price'] =  merged_df.Super.astype(float) / max(merged_df.Super.astype(float))
merged_df['crude_normed_price'] =  merged_df.price / max(merged_df.price)



g = ggplot.ggplot(merged_df, ggplot.aes('date', 'normalized_margins')) + ggplot.geom_line(colour="red") 
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Estimated Profit Margins based on Crude Prices vs Pump prices for Kenya")
g

Out[43]:

<ggplot: (295937509)>

In [15]:

# As you can see from the graph above, we are thoroughly getting shafted.
# 1. 'Super' petrol margins have gone up by >200% over the past 2 years.
# 2. It's rather obvious that petrol companies in Kenya are not tracking global prices. 
#   If anything, they are keeping prices artificially high and pocketing the change
# In other words, Petrol companies are making 2-3 times the profit they made in 2013

In [16]:

# So the arguement might be that the Kenyan currency has lost value.
# We can work around this by converting the KES pump price to the equivalent USD value
# at the time by using historical data from CBK.

In [17]:

# Let's fetch Central bank of Kenya Forex data
# Source https://www.centralbank.go.ke/index.php/interest-rates/commercial-banks-weighted-av

cbk_data_url = 'https://www.centralbank.go.ke/index.php/interest-rates/commercial-banks-weighted-av' 


def fetch_cbk_data_as_dicts(cbk_data_url):
    resp = requests.post(cbk_data_url, 
            data=dict(
                # Start date
                date=22, month=1, year=2007, 
                # End date
                tdate=22, tmonth=1, tyear=2016, 
                currency='US DOLLAR', searchForex='Search'),
            # CBK has a braindead policy of blocking non-ui browser agents
            headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0'})

    cbk_content = BeautifulSoup(resp.content)
    cbk_data_table_header = cbk_content.find(name='div', attrs={'id':"interbank"}).table.find('tr', attrs={'class': 'tr_heading'})
    headers = [td.text for td in cbk_data_table_header.findAll('td')]
    rows_of_dicts = []

    for table_row in cbk_data_table_header.fetchNextSiblings():
        row = [td.text for td in table_row.findAll('td')]
        rows_of_dicts.append(dict(zip(headers, row)))
    return rows_of_dicts

def fetch_cbk_data_as_df(cbk_data_url):
    return pandas.DataFrame(fetch_cbk_data_as_dicts(cbk_data_url))

In [18]:

cbk_usd_df = fetch_cbk_data_as_df(cbk_data_url)
cbk_usd_df['date'] = cbk_usd_df.Date.map(lambda x: dateutil.parser.parse(x, dayfirst=True))
cbk_usd_df = cbk_usd_df.sort(['date'], ascending=True)
cbk_usd_df.head(15)

/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  app.launch_new_instance()

Out[18]:

	Buy	Currency	Date	Mean	Sell	date
76	69.8256	US DOLLAR	22-01-2007	69.8978	69.9700	2007-01-22
77	70.0011	US DOLLAR	23-01-2007	70.0733	70.1456	2007-01-23
78	70.0711	US DOLLAR	24-01-2007	70.1433	70.2156	2007-01-24
79	70.2300	US DOLLAR	25-01-2007	70.2994	70.3689	2007-01-25
80	70.4411	US DOLLAR	26-01-2007	70.5056	70.5700	2007-01-26
81	70.5533	US DOLLAR	29-01-2007	70.6256	70.6978	2007-01-29
82	70.4956	US DOLLAR	30-01-2007	70.5567	70.6178	2007-01-30
83	70.4756	US DOLLAR	31-01-2007	70.5367	70.5978	2007-01-31
84	70.3167	US DOLLAR	01-02-2007	70.3722	70.4278	2007-02-01
85	70.2000	US DOLLAR	02-02-2007	70.2722	70.3444	2007-02-02
86	70.1411	US DOLLAR	05-02-2007	70.2078	70.2744	2007-02-05
87	69.9544	US DOLLAR	06-02-2007	70.0211	70.0878	2007-02-06
88	69.9033	US DOLLAR	07-02-2007	69.9700	70.0367	2007-02-07
89	69.6667	US DOLLAR	08-02-2007	69.7389	69.8111	2007-02-08
90	69.6533	US DOLLAR	09-02-2007	69.7256	69.7978	2007-02-09

In [39]:

# Cached here: http://pastebin.com/345FHtWH
# data_url =  PastebinAPI().paste(pastebin_api_key, cbk_usd_df.to_csv(), paste_name="CBK Kenya USD Forex rates", paste_format="text")
# print data_url

http://pastebin.com/345FHtWH

In [42]:

# Let's see what the USD / KES rate looks like
g = ggplot.ggplot(cbk_usd_df, ggplot.aes('date', 'Buy')) + ggplot.geom_line(colour="red") 
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('3 month'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Kenya USD Forex Rate")
g

Out[42]:

<ggplot: (295776609)>

In [86]:

merged_df_with_usd = pandas.merge(merged_df, cbk_usd_df, how='inner', on=['date', 'date'])
# Convert the super price to USD
merged_df_with_usd['super_as_usd'] = merged_df_with_usd.Super.astype(float) / merged_df_with_usd.Mean.astype(float)

# Normalise the USD super price
merged_df_with_usd['normalised_super_as_usd'] = merged_df_with_usd.super_as_usd / max(merged_df_with_usd.super_as_usd) 


# And the USD Margins
merged_df_with_usd['normalized_margins_usd'] = merged_df_with_usd.normalised_super_as_usd / merged_df_with_usd.crude_normed_price


merged_df_with_usd.head()

Out[86]:

	day	price	date	crude_normed_price	Diesel	Kerosene	Regular	Super	Town	super_normed_price	normalized_margins	Buy	Currency	Date	Mean	Sell	super_as_usd	normalised_super_as_usd	normalized_margins_usd
0	03/15/2013	109.32	2013-03-15	0.979745	107.37	88.54	105.76	117.69	Nairobi	1.000000	1.071625	85.5389	US DOLLAR	15-03-2013	85.6361	85.7333	1.374304	1.000000	1.020673
1	05/15/2013	101.57	2013-05-15	0.910289	101.06	79.46	105.76	113.24	Nairobi	0.962189	1.109781	83.7222	US DOLLAR	15-05-2013	83.8069	83.8917	1.351201	0.983190	1.080086
2	07/15/2013	109.05	2013-07-15	0.977326	102.86	79.49	105.76	109.52	Nairobi	0.930580	0.999702	87.1344	US DOLLAR	15-07-2013	87.2364	87.3383	1.255439	0.913509	0.934703
3	08/15/2013	111.58	2013-08-15	1.000000	104.44	83.93	105.76	112.26	Nairobi	0.953862	1.001478	87.5044	US DOLLAR	15-08-2013	87.5906	87.6767	1.281644	0.932577	0.932577
4	10/15/2013	110.67	2013-10-15	0.991844	104.47	85.01	105.76	112.27	Nairobi	0.953947	1.009803	84.8944	US DOLLAR	15-10-2013	84.9944	85.0944	1.320911	0.961149	0.969052

In [41]:

# How does the USD price of the pump prices in Kenya look like?
g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'super_as_usd')) + ggplot.geom_line(colour="green") 
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("Kenya Super Price (USD)")
g

Out[41]:

<ggplot: (295167589)>

In [51]:

# So let's compare how all three normalised prices.
# If there's a huge departured between the normalised_USD pump price and the KES pump price in the graph
# below, we can the ascribe the margin deltas to the devaluation of KES. 
# If there change between the two graphs is marginal, then we can infer that the devaluation arguement is moot
g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line(colour="green") 
g = g + ggplot.geom_point(ggplot.aes(x='date', y='super_normed_price'), data=merged_df_with_usd,  color="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalised_super_as_usd'), data=merged_df_with_usd,  color="blue")
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("% Price Drop of KES Kenya Pump Price (Red dot) vs USD Kenya Pump Price (Blue dot) vs Crude Price (Green line)")
g

Out[51]:

<ggplot: (297819617)>

In [62]:

# US Gas prices. Source: https://ycharts.com/indicators/gas_price
us_gas_price_url = 'https://ycharts.com/charts/fund_data/json?securities=type%3Aindicator%2Cid%3AI%3AUSRGP%2Cinclude%3Atrue%2C%2C&calcs=id%3Aprice%2Cinclude%3Atrue%2C%2C&correlations=&format=real&recessions=false&zoom=5&startDate=&endDate=&chartView=&splitType=&scaleType=&note=&title=&source=&units=&quoteLegend=&partner=&quotes=&legendOnChart=&securitylistSecurityId=&clientGroupLogoUrl=&maxPoints=650&' 

def get_us_gas_prices(us_gas_price_url):
    resp = requests.get(us_gas_price_url)
    us_gas_prices_json = resp.json()
    return [{'date': datetime.fromtimestamp(row[0] / 1000), 'gallon_price': row[1]} 
                for row in us_gas_prices_json['chart_data'][0][0]['raw_data']]
us_gas_prices = get_us_gas_prices(us_gas_price_url)
us_gas_prices_df = pandas.DataFrame(us_gas_prices)

In [52]:

# Cached here: http://pastebin.com/Jda1iAAT
# data_url =  PastebinAPI().paste(pastebin_api_key, us_gas_prices_df.to_csv(), paste_name="US Gas Cost from ycharts", paste_format="text")
# print data_url

http://pastebin.com/Jda1iAAT

In [64]:

g = ggplot.ggplot(us_gas_prices_df, ggplot.aes('date', 'gallon_price')) + ggplot.geom_line(colour="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("US Pump Price per Gallon")
g

Out[64]:

<ggplot: (298367705)>

In [77]:

# Normalized and compared to KE & crude prices 
min_erc_date = min(erc_prices_nairobi_df.date)
erc_us_gas_prices_df_intersection_df = us_gas_prices_df[us_gas_prices_df.date > min_erc_date.date()]

erc_us_gas_prices_df_intersection_df['normalized_gallon_price'] = erc_us_gas_prices_df_intersection_df.gallon_price / max(erc_us_gas_prices_df_intersection_df.gallon_price)



g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'crude_normed_price')) + ggplot.geom_line(colour="green") 
g = g + ggplot.geom_point(ggplot.aes(x='date', y='super_normed_price'), data=merged_df_with_usd,  color="red")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalized_gallon_price'), data=erc_us_gas_prices_df_intersection_df,  color="brown")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalised_super_as_usd'), data=merged_df_with_usd,  color="blue")
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90)) + ggplot.ggtitle("% Price Drop of KES Kenya Pump Price (Red dot) vs USD Kenya Pump Price (Blue dot) \n vs Crude Price (Green line) vs US Pump Prices(Brown)")
g

/Users/mickeymouse/venvs/default/lib/python2.7/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[77]:

<ggplot: (298598673)>

In [87]:

# Granted that the KES has devalued in th recent past, there devalution has changed the USD pump price by at most 10%
# If we plot the margins, we should still see that the Oilers are making significantly more in profits

g = ggplot.ggplot(merged_df_with_usd, ggplot.aes('date', 'normalized_margins')) + ggplot.geom_line(colour="red")
g = g + ggplot.geom_point(ggplot.aes(x='date', y='normalized_margins_usd'), data=merged_df_with_usd,  color="red")
g = g + ggplot.scale_x_date(labels = ggplot.date_format("%Y-%m-%d"), breaks=ggplot.date_breaks('1 months'))
g = g + ggplot.theme(axis_text_x = ggplot.element_text(angle = 90))
g

Out[87]:

<ggplot: (300572801)>

In [96]:

# The points above still show that exchange rate impact is minimal to profit margins.
# If the oilers are to get the same profits as 2014-10-01 (~1.4 over the Brent crude as opposed to the current ~3.0),
# then the pump prices should be current price * 1.4 / 3.0
#In other words, we should be paying
todays_row = merged_df_with_usd.tail(1)
todays_row
fair_price = todays_row['crude_normed_price'].astype('float') * max(merged_df_with_usd['super_as_usd']).astype('float') * todays_row['Mean'].astype('float') 
print "Fair pump price (KES): ",fair_price.values[0]
# Yep the figure is 36.30 Shillings.

# As USD
fair_price_usd = fair_price.values[0] / todays_row['Mean'].astype(float)
print "Fair pump price (USD): ", fair_price_usd.values[0]

# According to this: http://www.thenakedscientists.com/forum/index.php?topic=42516.0
# 1 barrel of oil ~ 42 us gallons
barrel_in_liters = 3.79 * 42

# Total cost of raw crude per liter in KES 
raw_crude_cost_per_liter = todays_row['Mean'].astype(float) * todays_row['price'] / barrel_in_liters
print "Raw crude cost per liter (KES): ", raw_crude_cost_per_liter.values[0]

print "Cost of distillation + profit as a function of the raw product per liter (Unfair price):",  (todays_row['Super'].astype('float') - raw_crude_cost_per_liter.values[0]).values[0]
print "Cost of distillation + profit as a function of the raw product per liter (Fair price):",  (fair_price - raw_crude_cost_per_liter.values[0]).values[0] 
print "Oilers extra profit per liter (over and above 2013 profit margins): ", (todays_row['Super'].astype('float') - fair_price).values[0]

Fair pump price (KES):  36.3067410612
Fair pump price (USD):  0.354722562333
Raw crude cost per liter (KES):  18.5183565775
Cost of distillation + profit as a function of the raw product per liter (Unfair price): 70.1216434225
Cost of distillation + profit as a function of the raw product per liter (Fair price): 17.7883844837
Oilers extra profit per liter (over and above 2013 profit margins):  52.3332589388

In [97]:

# Summary
# Sources: https://ycharts.com/indicators/gas_price
# 1. US Gas (Petrol) prices were 3.764 in Mar 18 2013, 
# 2. US Gas prices are now 2.02 in Jan 18 2016
# 3. Total percentage change: 46% drop in price: -46.3336875664187
print "Drop in US pump price:", (2.02 - 3.764) * 100 / 3.764

# 4. KE GAS prices in Mar 15 2013 was: 117.69
# 5. KE Gas Prices are now: 88.64
# 6. Total percentage change: 24% drop in price.
print "Drop in KE pump price:", (88.64 - 117.69) * 100 / 117.69

# Let's take care of devaluation
# 4. KE USD GAS prices in Mar 15 2013 was: 1.374304
# 5. KE USD Gas Prices are now: 0.866027
# 6. Total percentage change: 24% drop in price.
print "Drop in KE (USD) pump price:", (0.866027 - 1.374304) * 100 / 1.374304

# If we were to pay the US rate, this is the pump price that we should pay (inclusive of currency devaluation):
print "Pump price if we were to drop our prices to the same level as the US...: ", 46.3336875664/ 100 * 1.374304 * 102.3525
#(US % Drop) * Price in Mar 2013 as USD * Current USD/KES Forex rate

Drop in US pump price: -46.3336875664
Drop in KE pump price: -24.683490526
Drop in KE (USD) pump price: -36.9843207907
Pump price if we were to drop our prices to the same level as the US...:  65.1745635173

In [ ]:

Infinite diversity in infinite combinations.

Monday, August 7, 2017

De-obfuscate videos from pluralsight

Tuesday, January 26, 2016

Cost of Petrol in Kenya

Blog Archive

Labels