Sei sulla pagina 1di 3

print("Step 01: Importing Required libraries")

import pandas as pd,numpy as np,psycopg2,pygsheets


from datetime import datetime, timedelta

print("Step 02: Connecting to data source")


con = psycopg2.connect("dbname='supplydemand' port='5439' user='training'
password='Training#01' host='redshiftprod.bold.com'")

print("Step 03: Querying to data source")


redshiftdata = pd.DataFrame(pd.read_sql_query("""
select to_char(dd.fulldate, 'YYYY-MM') as yearmonth,dp.reportlabel as portal,dm.medianame as
traffic,dua.devicelabel as device,
sum(s.new_visits) as newvisits,sum(returning_visits) as returningvisits,
sum(s.registrations) as registrations,sum(s.new_subscriptions) as newsubscriptions,
sum(s.new_subscription_revenue_usd_vat) as new_subscription_revenue_usd_vat,
sum(s.trial_subscription_revenue_usd_vat) as trial_subscription_revenue_usd_vat,
sum(s.renewal_subscription_revenue_usd_vat) as renewal_subscription_revenue_usd_vat
from edw.fact_subscriptionfunnel_ad s
left join edw.dim_date dd on s.event_datekey=dd.datekey
left join edw.dim_portal dp on s.portalkey=dp.portalkey
left join edw.dim_medium dm on s.event_mediumkey=dm.mediumkey
left join edw.dim_useragent dua on s.event_useragentkey=dua.useragentkey
where dd.fulldate>='2018-01-01'
group by 1,2,3,4
""",con))

print("Step 04: Cleaning exported data")


# using dictionary to convert specific columns (so that we can do data manipulation tasks)
convert_dict = {'yearmonth':str,'portal':str, 'traffic':str, 'device':str,
'newvisits':int,'returningvisits':int,
'registrations':int,'newsubscriptions':int,'new_subscription_revenue_usd_vat':float,

'trial_subscription_revenue_usd_vat':float,'renewal_subscription_revenue_usd_vat':float}
redshiftdata = redshiftdata.astype(convert_dict)
# filling missing values of numeric columns with zero
redshiftdata = redshiftdata.fillna(0)

print("Step 05: Creating resulting view which is required")


redshiftdata['revenue'] = redshiftdata['new_subscription_revenue_usd_vat'] +
redshiftdata['trial_subscription_revenue_usd_vat'] +
redshiftdata['renewal_subscription_revenue_usd_vat']

print("Step 06: Delivering Report")


## Case 1 Using Excel for Report Delivery
## Case 2 Using Google Sheets for Report Delivery
## Case 3 Using PDF for Report Delivery
## Case 4 Using Power Point for Report Delivery
## There are many more delivery methods

## Case 1 Using Google Sheets for Report Delivery


gc = pygsheets.authorize('D:/Python Training/Python Training BOLD Use
Cases/Reporting/client_secrets.json',no_cache=True)
sh = gc.open("Key Performance Measures Report")
wks = sh.worksheet_by_title('Raw Data')
wks.clear(start='A2',end='Z1000000')
wks.set_dataframe(redshiftdata, start='A2', fit=False,copy_index=False,
copy_head=False,escape_formulae=True)

## Case 1 Using Excel for Report Delivery


#let’s generate an excel report with 1 sheet containing the results of the pivot tables

# Learn about API authentication here: https://plot.ly/pandas/getting-started


# Find your api_key here: https://plot.ly/settings/api
momrevenue = redshiftdata.groupby(['yearmonth'])['revenue'].sum().to_frame().reset_index()
portal = redshiftdata.groupby(['portal'])['revenue'].sum().to_frame().reset_index()

import plotly.plotly as py
import plotly.graph_objs as go

data = [
go.Bar(
x=momrevenue['yearmonth'], # assign x as the dataframe column 'x'
y=momrevenue['revenue']
)
]

# IPython notebook
# py.iplot(data, filename='pandas-bar-chart')
writer = pd.ExcelWriter('report.xlsx',)
momrevenue.to_excel(writer, 'Sheet1',index=False )
portal.to_excel(writer, 'Sheet2',index=False )
writer.save()

#let’s generate an excel report using predefind template with 1 sheet containing the results of
the pivot tables
import openpyxl

## Case 2 Using PDF for Report Delivery


# Process - Template Tool(Jinga) -> PDF Conversion Tool -> Final PDF Report
#
redshiftdata['YearMonth'] = redshiftdata.fulldate.map(lambda x: x.strftime('%Y-%m'))

momrevenue = redshiftdata.groupby(['YearMonth'])['revenue'].sum().to_frame().reset_index()

from jinja2 import Environment, FileSystemLoader


env = Environment(loader=FileSystemLoader('.'))
template = env.get_template("myreport.html")

template_vars = {"title" : "Key Performance Metrics Report",


"table": momrevenue.to_html()}

html_out = template.render(template_vars)

from weasyprint import HTML


HTML(string=html_out).write_pdf("report.pdf")

Potrebbero piacerti anche