├── .gitignore ├── LICENSE ├── README.md ├── client_secrets.json ├── credentials.json ├── drivers ├── __init__.py ├── appsflyer.py ├── database.py ├── facebook.py ├── google.py └── performance.py ├── init ├── __init__.py └── init.py ├── main.py ├── pax ├── __init__.py ├── appsflyer.py ├── database.py ├── facebook.py ├── google.py ├── performance.py └── twitter.py ├── requirements.txt ├── staticfiles ├── reports │ ├── performancereportdrivers.csv │ └── performancereportnewapp.csv └── tables │ ├── custos_appsflyer_newapp.xlsx │ ├── custos_gmaps.csv │ ├── media_and_source_by_source_and_campaign_names.csv │ ├── siglas_pracas.csv │ └── source_and_campaigns_by_driverchanelid.csv └── utils ├── __init__.py ├── apicalls.py ├── drive.py ├── paths.py ├── queries.py └── time.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | #IntelliJ 10 | .idea/ 11 | 12 | # macOS 13 | .DS_Store 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Nicolas Leal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Performance Reports 2 | This was one of the first programs i made for 99. It gets data from day 1 from the current month to d-1 from current month 3 | Python program to extract locally a .csv of all the data for marketing performance team from all the platforms 4 | Google, Facebook, Twitter, Appsflyer and Adjust (we are currently not collecting this data anymore). 5 | 6 | ## Initialize 7 | You can change the init of the APIs, set credentials and credential related stuff on init/init.py 8 | ``` 9 | pip install -r /path/to/performance_reports/requirements.txt 10 | ``` 11 | 12 | ## Utils 13 | + ***apicalls.py*** - classes to retrieve data from the services API. 14 | + ***drive.py*** - sends and recieves data from/to drive. 15 | + ***paths.py*** - gets the path for reports and tables. 16 | + ***queries.py*** - holds queries to get data from the database as a dataframe (if there's any change in a query, don't forget 17 | to change the dataframes column names). 18 | + ***time.py*** - gets the start_date(since when) and end_date(to when) parameters. 19 | 20 | ## Run 21 | Easy peasie 22 | ``` 23 | python main.py 24 | ``` 25 | As you might think, ***pax*** folder is for retrieving and ETL on passengers data, ***drivers*** is the same but for drivers data 26 | 27 | ## StaticFiles 28 | There are 2 folders: ***tables*** and ***reports***. 29 | ### Tables 30 | This holds secondary data used by the program, it automatically downloads and update your local files from Google Drive 31 | once you start the program. 32 | 33 | ### Reports 34 | Holds the reports generated by the program, it automatically sends to Google Drive once the program finishes running 35 | 36 | -------------------------------------------------------------------------------- /client_secrets.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/client_secrets.json -------------------------------------------------------------------------------- /credentials.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/credentials.json -------------------------------------------------------------------------------- /drivers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/drivers/__init__.py -------------------------------------------------------------------------------- /drivers/appsflyer.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import logging 3 | import pandas 4 | import requests 5 | import datetime 6 | import utils.time 7 | from utils.paths import PATH_CUSTOS_APPSFLYER, PATH_DEFINE_VALUES 8 | from utils.queries import QUERY_DRIVER_APPSFLYER_INSTALLS 9 | from utils.apicalls import GoogleAds, AppsFlyer 10 | from init.init import databseinit 11 | 12 | 13 | def appsflyerData(): 14 | appsflyer_data = pandas.DataFrame() 15 | define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';') 16 | custos = pandas.read_excel(PATH_CUSTOS_APPSFLYER, 17 | delimiter=';', keep_default_na=False, 18 | na_values=['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', '#NA', 'NULL', 19 | 'NaN', '-NaN', 'nan', '-nan']) 20 | custos = custos.replace(r'', numpy.nan, regex=True) 21 | 22 | # format dataframes 23 | custos['End Date'] = pandas.to_datetime(pandas.Series(custos['End Date']), format="%d/%m/%Y") 24 | custos['Start Date'] = pandas.to_datetime(pandas.Series(custos['Start Date']), format="%d/%m/%Y") 25 | 26 | # config startdate and enddate and create a list of dates 27 | start = datetime.datetime.strptime(utils.time.startdate, "%Y-%m-%d") 28 | end = datetime.datetime.strptime(utils.time.enddate, "%Y-%m-%d") 29 | date_list = [start + datetime.timedelta(days=x) for x in range(0, (end - start).days + 1)] 30 | custos = custos[(custos['End Date'] > start) | (custos['End Date'].isnull())] 31 | 32 | appsflyer = AppsFlyer() 33 | appsflyer = appsflyer.reports(utils.time.startdate, utils.time.enddate, 34 | ['app_driver_android', 'app_driver_ios'], 35 | ['in_app_events_report', 'organic_in_app_events_report'], 36 | ['event_time', 'media_source', 'campaign', 'af_adset', 'event_name'], 37 | ['app_driver_event']) 38 | appsflyer['Campaign'] = appsflyer['Campaign'].fillna('None') 39 | appsflyer['Adset'] = appsflyer['Adset'].fillna('None') 40 | appsflyer['Media Source'] = appsflyer['Media Source'].fillna('NA') 41 | appsflyer['enviou_todos_docs'] = 1 42 | 43 | appsflyer.drop(['Event Name'], inplace=True, axis=1) 44 | appsflyer['Event Time'] = appsflyer['Event Time'].apply(lambda x: x.split(' ')[0]) 45 | appsflyer = appsflyer.groupby(['Event Time', 'Media Source', 'Campaign','Adset', 'os_name']).sum() 46 | appsflyer = appsflyer.reset_index() 47 | appsflyer = appsflyer.rename(columns={ 48 | 'Event Time': 'date', 49 | 'Media Source': 'source', 50 | 'Campaign': 'campaign', 51 | 'Adset': 'adgroup' 52 | }) 53 | 54 | appsflyer = pandas.concat([appsflyer, retrieveAppsflyerInstalls()]) 55 | 56 | google = GoogleAds() 57 | google = google.reportcampaigns() 58 | google.rename( 59 | columns={ 60 | 'Campaign': 'campaign_name', 61 | 'Campaign ID': 'campaign' 62 | }, inplace=True) 63 | 64 | google = google.drop_duplicates(subset=['campaign']) 65 | 66 | appsflyer = pandas.merge(appsflyer, google, how='left', left_on='campaign', right_on='campaign', indicator=True) 67 | appsflyer['campaign'][appsflyer['_merge'] == 'both'] = appsflyer['campaign_name'] 68 | appsflyer.drop(['campaign_name', '_merge'], inplace=True, axis=1) 69 | 70 | 71 | appsflyer['campaign'] = appsflyer['campaign'].fillna('None') 72 | appsflyer['source'] = appsflyer['source'].fillna('NA') 73 | appsflyer['tool'] = 'AppsFlyer' 74 | appsflyer['midia'] = 'nonpaid' 75 | define_values = define_values[define_values['app'] == 'driver'] 76 | for i in ['source', 'midia']: 77 | define_aux = define_values[~define_values[i].isna()] 78 | for rows in [tuple(x) for x in define_aux.values]: 79 | if str(rows[2]) == 'nan': 80 | appsflyer[i][appsflyer['campaign'].str.contains(rows[3], na=False)] = rows[ 81 | 1 if i == 'source' else 0] 82 | else: 83 | appsflyer[i][appsflyer['source'].str.contains(rows[2], na=False)] = rows[ 84 | 1 if i == 'source' else 0] 85 | 86 | appsflyer['campaign'][appsflyer['source'].str.contains('Driver_Acq', na=False)] = \ 87 | appsflyer[ 88 | 'source'] 89 | 90 | appsflyer = appsflyer[['date','midia', 'tool', 'source', 'os_name', 'campaign', 'adgroup', 'installs', 'enviou_todos_docs']] 91 | 92 | appsflyer['date'] = pandas.to_datetime(pandas.Series(appsflyer['date']), format="%Y-%m-%d") 93 | for date in date_list: 94 | # concat everything on the go 95 | appsflyer_data = pandas.concat([appsflyer_data, 96 | pandas.merge( 97 | (appsflyer[appsflyer['date'] == date]), 98 | (custos[((custos['End Date'] >= date) | (custos['End Date'].isnull())) & ( 99 | custos['Start Date'] <= date)]), 100 | how='left', 101 | left_on='campaign', 102 | right_on='campaign')]) 103 | 104 | appsflyer_data['amount_spent'] = 0 105 | appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpi'] = appsflyer_data['installs'].astype(float) * appsflyer_data['payout'].astype(float) 106 | appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpl'] = appsflyer_data['enviou_todos_docs'].astype(float) * appsflyer_data['payout'].astype(float) 107 | 108 | appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].astype(str) 109 | appsflyer_data['amount_spent'][appsflyer_data['amount_spent'] == 'nan'] = '0.0' 110 | appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].apply(lambda x: str(x.replace('.', ','))) 111 | 112 | appsflyer_data['installs'] = appsflyer_data['installs'].fillna(0) 113 | appsflyer_data['criou_basic'] = 0 114 | appsflyer_data['criou_basic'] = appsflyer_data['installs']*0.36 115 | appsflyer_data['criou_basic'] = appsflyer_data['criou_basic'].apply(lambda x: round(x)) 116 | appsflyer_data['criou_basic'] = appsflyer_data['criou_basic'].astype(int) 117 | 118 | appsflyer_data.drop(['type', 'payout', 'Start Date', 'End Date'], inplace=True, axis=1) 119 | 120 | appsflyer_data = appsflyer_data.sort_values(by='date') 121 | 122 | logging.info("[DRIVER] Appsflyer Updated") 123 | print('Driver - Appsflyer Updated') 124 | 125 | return appsflyer_data.drop_duplicates().reset_index(drop=True) 126 | 127 | def retrieveAppsflyerInstalls(): 128 | conn = databseinit() 129 | curNewUsers = conn.cursor() 130 | curNewUsers.execute(QUERY_DRIVER_APPSFLYER_INSTALLS) 131 | col_names = [] 132 | for x in curNewUsers.description: 133 | col_names.append(x[0]) 134 | newusers = pandas.DataFrame(curNewUsers.fetchall(), columns=col_names) 135 | return newusers 136 | -------------------------------------------------------------------------------- /drivers/database.py: -------------------------------------------------------------------------------- 1 | from init.init import databseinit as databaseinit 2 | from utils.paths import PATH_TO_DRIVER_CHANNEL_EQUIVALENCE, PATH_DEFINE_VALUES 3 | from utils.queries import QUERY_DRIVER_SIGN_UP_NEWAPP, QUERY_DRIVER_NEW_REGULAR_NEWAPP, QUERY_DRIVER_DFT_NEWAPP 4 | from utils.apicalls import GoogleAds 5 | import unidecode 6 | import pandas 7 | import logging 8 | 9 | 10 | def BaseNewApp(): 11 | define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';') 12 | 13 | reports = pandas.DataFrame() 14 | for query in [QUERY_DRIVER_NEW_REGULAR_NEWAPP, QUERY_DRIVER_DFT_NEWAPP, QUERY_DRIVER_SIGN_UP_NEWAPP]: 15 | aux = runQuery(query) 16 | 17 | #primeiro leio o csv de consulta, para virar um dataframe 18 | driver_channels = pandas.read_csv( 19 | PATH_TO_DRIVER_CHANNEL_EQUIVALENCE, sep=';') 20 | 21 | #depois junto as tabelas 22 | aux = pandas.merge(aux, driver_channels, how='left', left_on='driver_channel', right_on='driver_channel', 23 | indicator=True) 24 | 25 | #aqui eu atribuo primeiro ao campo "source" o campo "source_name" caso o "driver_channel" exista em ambas as tabelas. 26 | #faço o mesmo para o campo "campaign" 27 | aux['source'][aux['_merge'] == 'both'] = aux['source_name'] 28 | aux['campaign'][aux['_merge'] == 'both'] = aux['campaign_name'] 29 | aux.drop(['source_name', 'campaign_name', 'driver_channel', '_merge'], inplace=True, axis=1) 30 | 31 | 32 | reports = pandas.concat([reports, aux]) 33 | 34 | google = GoogleAds() 35 | google = google.reportcampaigns() 36 | google.rename( 37 | columns={ 38 | 'Campaign': 'campaign_name', 39 | 'Campaign ID': 'campaign' 40 | }, inplace=True) 41 | 42 | google = google.drop_duplicates(subset=['campaign']) 43 | 44 | reports = pandas.merge(reports, google, how='left', left_on='campaign', right_on='campaign', indicator=True) 45 | reports['campaign'][reports['_merge'] == 'both'] = reports['campaign_name'] 46 | reports.drop(['campaign_name', '_merge'], inplace=True, axis=1) 47 | 48 | reports['region'] = reports['region'].str.upper() 49 | 50 | reports['tool'] = 'BaseNewApp' 51 | reports['midia'] = 'nonpaid' 52 | define_values = define_values[define_values['app'] == 'driver'] 53 | for i in ['source', 'midia']: 54 | define_aux = define_values[~define_values[i].isna()] 55 | for rows in [tuple(x) for x in define_aux.values]: 56 | if str(rows[2]) == 'nan': 57 | reports[i][reports['campaign'].str.contains(rows[3], na=False)] = rows[ 58 | 1 if i == 'source' else 0] 59 | else: 60 | reports[i][reports['source'].str.contains(rows[2], na=False)] = rows[ 61 | 1 if i == 'source' else 0] 62 | 63 | reports['date'] = pandas.to_datetime(pandas.Series(reports['date']), format="%Y-%m-%d") 64 | reports['week'] = reports['date'].dt.week 65 | 66 | reports['region'] = reports['region'].apply(lambda x: unidecode.unidecode(x)) 67 | 68 | reports = reports[['date', 'week', 'tool', 'midia', 'source', 'campaign', 'signups', 'signups_with_migration', 'regulars', 'regulars_with_migration', 'dft', 'dft_with_migration', 'region']] 69 | 70 | logging.info("[DRIVER] Database Updated") 71 | print('Driver - Database Updated') 72 | return reports 73 | 74 | 75 | 76 | def runQuery(query): 77 | conn = databaseinit() 78 | cursor = conn.cursor() 79 | cursor.execute(query) 80 | col_names = [] 81 | for x in cursor.description: 82 | col_names.append(x[0]) 83 | data = pandas.DataFrame(cursor.fetchall(), columns=col_names) 84 | return data 85 | 86 | 87 | def no_channelDistribution(data, distribute_by, columns_to_group, filter_by = None, drop_in_distribution = None, secondDataframe = None): 88 | data = data.fillna('None') 89 | if filter_by: 90 | column = 0 91 | value = 0 92 | for columns, values in filter_by.items(): 93 | column = columns 94 | value = values 95 | onlyNA = data[(data[column] == value)] 96 | temponlyValidChannels = data[(data[column] != value)] 97 | if drop_in_distribution: 98 | drop_in_na = drop_in_distribution + [column] 99 | temponlyValidChannels.drop(drop_in_distribution, inplace=True, axis=1) 100 | onlyNA.drop(drop_in_na, inplace=True, axis=1) 101 | else: 102 | drop_in_na = ['campaign'] 103 | else: 104 | 105 | onlyNA = secondDataframe 106 | temponlyValidChannels = data 107 | drop_in_na = ['campaign'] 108 | 109 | onlyNA = onlyNA.groupby([x for x in columns_to_group if x not in drop_in_na]).sum() 110 | onlyNA = onlyNA.reset_index() 111 | temponlyValidChannels = temponlyValidChannels.groupby(columns_to_group).agg( 112 | {[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]: 'sum'}) 113 | temponlyValidChannels = temponlyValidChannels.groupby([x for x in columns_to_group if x not in distribute_by]).apply( 114 | lambda x: x / float(x.sum())) 115 | temponlyValidChannels = temponlyValidChannels.reset_index() 116 | 117 | temponlyValidChannels.rename(columns={[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]: [x for x in list(temponlyValidChannels) if x not in columns_to_group][0]+'_%'}, inplace=True) 118 | temponlyValidChannels = pandas.merge(onlyNA, temponlyValidChannels, how='left', 119 | left_on=[item for item in columns_to_group if item not in distribute_by], 120 | right_on=[item for item in columns_to_group if item not in distribute_by]) 121 | 122 | temponlyValidChannels[[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]] = round(temponlyValidChannels[[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]] * 123 | temponlyValidChannels[[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]+'_%']) 124 | temponlyValidChannels.drop([[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]+'_%'], inplace=True, axis=1) 125 | 126 | return temponlyValidChannels 127 | -------------------------------------------------------------------------------- /drivers/facebook.py: -------------------------------------------------------------------------------- 1 | from utils.apicalls import FacebookAds 2 | import utils.time 3 | import logging 4 | 5 | 6 | def facebookdata(ad_accounts): 7 | facebook = FacebookAds() 8 | facebook = facebook.reports(date_start=utils.time.startdate, date_end=utils.time.enddate, ad_accounts=ad_accounts, extracted=list()) 9 | facebook['midia'] = 'paid' 10 | facebook['source'] = 'facebook' 11 | facebook['os_name'] = 'android' 12 | facebook['network'] = 'Facebook Ads' 13 | 14 | facebook['source'][facebook['adset_name'].str.contains('_INS')] = 'instagram' 15 | facebook['os_name'][facebook['campaign_name'].str.contains('IOS')] = 'ios' 16 | facebook['network'][facebook['adset_name'].str.contains('_INS')] = 'Instagram Installs' 17 | 18 | facebook.rename(columns={'date_start': 'date', 'campaign_name': 'campaign', 'adset_name': 'adgroup', 'ad_name': 'creative', 'spend':'amount_spent'}, inplace=True) 19 | facebook['tool'] = 'FaceAds' 20 | facebook = facebook[ 21 | ['date','tool','midia', 'source', 'os_name', 'network', 'campaign', 'adgroup', 'creative', 'amount_spent', 'impressions', 'clicks']] 22 | 23 | logging.info("[DRIVER] Facebook Updated") 24 | print('Driver - Facebook Updated') 25 | return facebook 26 | -------------------------------------------------------------------------------- /drivers/google.py: -------------------------------------------------------------------------------- 1 | from utils.apicalls import GoogleAds 2 | import utils.time 3 | from utils.paths import PATH_DEFINE_VALUES 4 | import pandas 5 | import logging 6 | 7 | 8 | def googlereports(customer_ids): 9 | reports = pandas.DataFrame() 10 | for customer_id in customer_ids: 11 | data = GoogleAds() 12 | data = data.reports(dateStart=utils.time.startdatebase, dateEnd=utils.time.enddatebase, customerId=customer_id) 13 | 14 | data.drop(['Clicks'], inplace=True, axis=1) 15 | # Column creation 16 | data['midia'] = 'paid' 17 | data['source'] = 'Google' 18 | data['network'] = 'Google Search - New Blue' 19 | data['os_name'] = 'android' 20 | 21 | # Convert values based on data on created tables 22 | 23 | define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';') 24 | define_values = define_values[(define_values['app'] == 'driver') & (~define_values['source'].isna()) & 25 | (~define_values['campaign contains'].isna())] 26 | 27 | for rows in [tuple(x) for x in define_values.values]: 28 | data['source'][data['Campaign'].str.contains(rows[3], na=False)] = rows[1] 29 | 30 | data['os_name'][data['Campaign'].str.contains('_iOS')] = 'ios' 31 | data.rename( 32 | columns={'Day': 'date', 33 | 'Campaign': 'campaign', 34 | 'Cost': 'amount_spent', 35 | 'Impressions': 'impressions', 36 | 'Interactions': 'clicks' 37 | }, inplace=True) 38 | data['tool'] = 'Adwords' 39 | reports = pandas.concat([reports, data]) 40 | 41 | reports = reports[['date','tool', 'midia', 'source', 'os_name', 42 | 'network', 'campaign', 'amount_spent', 'impressions', 'clicks']] 43 | 44 | logging.info("[DRIVER] Google Updated") 45 | print('Driver - Google Updated') 46 | return reports 47 | 48 | 49 | -------------------------------------------------------------------------------- /drivers/performance.py: -------------------------------------------------------------------------------- 1 | from utils.paths import PATH_SIGLAS_PRACAS 2 | from drivers.appsflyer import appsflyerData as appsflyerDriversData 3 | from drivers.facebook import facebookdata as facebookdrivers 4 | from drivers.google import googlereports as googledrivers 5 | from drivers.database import BaseNewApp as baseNewApp 6 | import pandas 7 | import unidecode 8 | 9 | 10 | def performanceDrivers(): 11 | performancedrivers = pandas.concat([ 12 | appsflyerDriversData(), 13 | facebookdrivers(['act_2013665502000466', 'act_1550209891679365']), 14 | googledrivers(['619-852-1756']), 15 | baseNewApp()]) 16 | 17 | performancedrivers['week'] = performancedrivers['date'].dt.week 18 | performancedrivers = performancedrivers[['date', 'week', 'tool', 'midia', 'source', 'os_name', 'campaign', 'adgroup', 'creative', 'installs', 'criou_basic', 'enviou_todos_docs', 'amount_spent', 'impressions', 'clicks', 'signups', 'signups_with_migration', 'regulars', 'regulars_with_migration', 'dft', 'dft_with_migration', 'region']] 19 | 20 | performancedrivers['campaign'][performancedrivers['source'].str.contains('Driver_Acq', na=False)] = \ 21 | performancedrivers['source'] 22 | performancedrivers['source'][performancedrivers['source'].str.contains('Driver_Acq', na=False)] = \ 23 | performancedrivers['source'].apply( 24 | lambda x: str(x)[str(x).find('_', 8) + 1:str(x).find('_', 11)] if str(x).count('_') > 2 else 'landing' 25 | ) 26 | 27 | if 'region' in performancedrivers.columns: 28 | region = pandas.read_csv(PATH_SIGLAS_PRACAS, sep=';') 29 | region['pracas'] = region['pracas'].str.upper() 30 | listofregions = [tuple(x) for x in region.values] 31 | performancedrivers['region'][performancedrivers['region'].isnull()] = 'BR' 32 | for i in range(0, len(listofregions)): 33 | performancedrivers['region'][performancedrivers['campaign'].str.contains(listofregions[i][0], na=False)] = \ 34 | listofregions[i][1] 35 | 36 | performancedrivers['region'] = performancedrivers['region'].apply(lambda x: unidecode.unidecode(x) if x is not None else 'BR') 37 | 38 | performancedrivers['week'] = performancedrivers['date'].dt.week 39 | return performancedrivers 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /init/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/init/__init__.py -------------------------------------------------------------------------------- /init/init.py: -------------------------------------------------------------------------------- 1 | from googleads import adwords 2 | from facebook_business.api import FacebookAdsApi 3 | from requests_oauthlib import OAuth1 4 | from pydrive.auth import GoogleAuth 5 | import psycopg2 6 | import os 7 | 8 | 9 | # Initialization class, used to initialize stuff 10 | def googledriveinit(): 11 | gauth = GoogleAuth() 12 | gauth.LoadCredentialsFile('/'.join(os.path.realpath(__file__).replace('\\', '/').split('/')[:-2]) 13 | + '/' + "credentials.json") 14 | if gauth.credentials is None: 15 | gauth.LocalWebserverAuth() 16 | elif gauth.access_token_expired: 17 | try: 18 | gauth.Refresh() 19 | except Exception as e: 20 | print('[Exception] - Exception fired on Google Drive Init:' + str(e)) 21 | gauth.LocalWebserverAuth() 22 | else: 23 | gauth.Authorize() 24 | 25 | gauth.SaveCredentialsFile('/'.join(os.path.realpath(__file__).replace('\\', '/').split('/')[:-2]) 26 | + '/' + "credentials.json") 27 | return gauth 28 | 29 | 30 | # Initialize Twitter 31 | def twitterinit(): 32 | consumer_key = 'consumer_key' 33 | consumer_secret = 'consumer_secret' 34 | access_token = 'access_token' 35 | access_token_secret = 'access_token_secret' 36 | 37 | auth = OAuth1(consumer_key, consumer_secret, 38 | access_token, access_token_secret) 39 | return auth 40 | 41 | 42 | # Initialize Facebook 43 | def facebookinit(): 44 | my_app_id = 'app_id' 45 | my_app_secret = 'app_secret' 46 | my_access_token = 'access_token' 47 | 48 | FacebookAdsApi.init(my_app_id, my_app_secret, my_access_token) 49 | 50 | 51 | # Initialize Adwords 52 | # the client_customer_id is set to drivers account 53 | def adwordsinit(customerId=None): 54 | adwords_client_string='''adwords: 55 | developer_token: developer_token 56 | client_customer_id: client_customer_id 57 | client_id: client_id 58 | client_secret: client_secret 59 | refresh_token: refresh_token''' 60 | adwords_client = adwords.AdWordsClient.LoadFromString(adwords_client_string) 61 | if customerId: 62 | adwords_client.SetClientCustomerId(customerId) 63 | return adwords_client 64 | 65 | 66 | # Initialize Appsflyer 67 | def appsflyerinit(): 68 | return 'appsflyer_acces_token' 69 | 70 | 71 | # Initialize Database 72 | def databseinit(): 73 | try: 74 | conn = psycopg2.connect(dbname='db_name', user='user', 75 | host='host', password='password', 76 | port='port') 77 | return conn 78 | except: 79 | print("I am unable to connect to the database") 80 | return None 81 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from drivers.performance import performanceDrivers as performancedrivers 2 | from pax.performance import performanceNewAppData as performanceNewApp 3 | from utils.paths import PATH_TO_RELATORIOS 4 | from utils.drive import load_files, upload_files 5 | import pandas 6 | import logging 7 | import warnings 8 | 9 | warnings.filterwarnings("ignore") 10 | pandas.options.mode.chained_assignment = None 11 | 12 | 13 | load_files() 14 | logging.info("Secondary Files Downloaded") 15 | print('Secondary Files Downloaded') 16 | 17 | performancedrivers().to_csv(PATH_TO_RELATORIOS + 'performancereportdrivers.csv', sep=',', float_format='%.0f', encoding='utf-8', index=False) 18 | logging.info("[DRIVER] Performance Report Generated") 19 | print('Driver - Performance Report Generated') 20 | 21 | performanceNewApp().to_csv(PATH_TO_RELATORIOS + 'performancereportnewapp.csv', sep=',', float_format='%.0f', encoding='utf-8', index=False) 22 | logging.info("[PAX] Performance Report Generated") 23 | print('Pax - Performance Report Generated') 24 | 25 | upload_files() 26 | logging.info("Performance Reports Files Uploaded") 27 | print('Performance Reports Files Uploaded') 28 | -------------------------------------------------------------------------------- /pax/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/pax/__init__.py -------------------------------------------------------------------------------- /pax/appsflyer.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | import numpy 3 | import datetime 4 | import utils.time 5 | from utils.apicalls import AppsFlyer 6 | from utils.queries import QUERY_PAX_APPSFLYER_INSTALLS 7 | from init.init import databseinit 8 | from utils.paths import PATH_CUSTOS_APPSFLYER, PATH_DEFINE_VALUES, PATH_CUSTOS_GMAPS 9 | from utils.apicalls import GoogleAds 10 | import logging 11 | 12 | def appsflyerData(): 13 | 14 | # method for retrieving and make some ETL process of the Appsflyer Data 15 | 16 | # first we have appsflyer_data which hold appsflyer's data 17 | # then define values, which is a dataframe for some Classification Process. 18 | # custos and custosgmaps is to define the costs for some campaigns, as costs for google maps and costs 19 | # for other campaigs is calculated differently, we need two different files. 20 | appsflyer_data = pandas.DataFrame() 21 | define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';') 22 | custosgmaps = pandas.read_csv(PATH_CUSTOS_GMAPS, sep=';') 23 | custos = pandas.read_excel(PATH_CUSTOS_APPSFLYER, 24 | delimiter=';', keep_default_na=False, 25 | na_values=['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', '#NA', 'NULL', 26 | 'NaN', '-NaN', 'nan', '-nan']) 27 | custos = custos.replace(r'', numpy.nan, regex=True) 28 | 29 | # format custos dataframe date, convert everything to datetime 30 | custos['End Date'] = pandas.to_datetime(pandas.Series(custos['End Date']), format="%d/%m/%Y") 31 | custos['Start Date'] = pandas.to_datetime(pandas.Series(custos['Start Date']), format="%d/%m/%Y") 32 | 33 | # config startdate and enddate and create a list of dates 34 | start = datetime.datetime.strptime(utils.time.startdate, "%Y-%m-%d") 35 | end = datetime.datetime.strptime(utils.time.enddate, "%Y-%m-%d") 36 | date_list = [start + datetime.timedelta(days=x) for x in range(0, (end - start).days + 1)] 37 | custos = custos[(custos['End Date'] > start) | (custos['End Date'].isnull())] 38 | 39 | # retrieve appsflyer Installations from our database 40 | appsflyer = retrieveAppsflyerInstalls() 41 | appsflyer.rename( 42 | columns={'install_time': 'date'}, inplace=True) 43 | 44 | # this is a dataframe passing daily_report, this is for retrieving clicks 45 | appsflyergmaps = AppsFlyer() 46 | appsflyergmaps = appsflyergmaps.reports(utils.time.startdate, utils.time.enddate, 47 | ['app_pax_android', 'app_pax_ios'], 48 | ['daily_report']) 49 | appsflyergmaps.rename( 50 | columns={ 51 | 'Date': 'date', 52 | 'Media Source (pid)': 'source', 53 | 'Campaign (c)': 'campaign', 54 | 'Clicks': 'clicks' 55 | }, 56 | inplace=True) 57 | appsflyergmaps['clicks'] = appsflyergmaps['clicks'].fillna(0) 58 | appsflyergmaps.drop([x for x in list(appsflyergmaps) if x not in ['date', 'source', 'campaign', 'clicks']], inplace=True, axis=1) 59 | 60 | # i just need the clicks from the source: "Google_Maps" 61 | appsflyer = pandas.concat([appsflyer, appsflyergmaps[appsflyergmaps['source']=='Google_Maps']]) 62 | 63 | # appsflyerevents from orange and degrade app, the event is af_first_trip 64 | appsflyerfirst = AppsFlyer() 65 | appsflyerfirst = appsflyerfirst.reports(utils.time.startdate, utils.time.enddate, 66 | ['app_pax_android', 'app_pax_ios'], 67 | ['in_app_events_report', 'organic_in_app_events_report'], 68 | ['event_time', 'media_source', 'campaign', 'event_name'], 69 | ['app_pax_event']) 70 | appsflyerfirst['first_trip'] = 1 71 | appsflyerfirst.drop(['Event Name'], inplace=True, axis=1) 72 | appsflyerfirst['Campaign'] = appsflyerfirst['Campaign'].fillna('None') 73 | appsflyerfirst['Media Source'] = appsflyerfirst['Media Source'].fillna('NA') 74 | appsflyerfirst['Event Time'] = appsflyerfirst['Event Time'].apply(lambda x: str(x).split(' ')[0]) 75 | appsflyerfirst = appsflyerfirst.groupby(['Event Time', 'Media Source', 'Campaign', 'os_name']).sum() 76 | appsflyerfirst = appsflyerfirst.reset_index() 77 | 78 | appsflyerfirst.rename( 79 | columns={ 80 | 'Event Time': 'date', 81 | 'Media Source': 'source', 82 | 'Campaign': 'campaign' 83 | }, inplace=True) 84 | 85 | 86 | # appsflyerevents from orange and degrade app, the event is af_sign_up 87 | appsflyersignup = AppsFlyer() 88 | appsflyersignup = appsflyersignup.reports(utils.time.startdate, utils.time.enddate, 89 | ['app_pax_android', 'app_pax_ios'], 90 | ['in_app_events_report', 'organic_in_app_events_report'], 91 | ['event_time', 'media_source', 'campaign', 'event_name'], 92 | ['app_pax_event_2']) 93 | appsflyersignup['sign_ups'] = 1 94 | 95 | appsflyersignup.drop(['Event Name'], inplace=True, axis=1) 96 | appsflyersignup['Campaign'] = appsflyersignup['Campaign'].fillna('None') 97 | appsflyersignup['Media Source'] = appsflyersignup['Media Source'].fillna('NA') 98 | appsflyersignup['Event Time'] = appsflyersignup['Event Time'].apply(lambda x: str(x).split(' ')[0]) 99 | appsflyersignup = appsflyersignup.groupby(['Event Time', 'Media Source', 'Campaign', 'os_name']).sum() 100 | appsflyersignup = appsflyersignup.reset_index() 101 | 102 | appsflyersignup.rename( 103 | columns={ 104 | 'Event Time': 'date', 105 | 'Media Source': 'source', 106 | 'Campaign': 'campaign' 107 | }, inplace=True) 108 | 109 | appsflyerevents = pandas.concat([appsflyerfirst, appsflyersignup]) 110 | appsflyer = pandas.concat([appsflyerevents, appsflyer]) 111 | 112 | google = GoogleAds() 113 | google = google.reportcampaigns(customerId='771-742-8350') 114 | google.rename( 115 | columns={ 116 | 'Campaign': 'campaign_name', 117 | 'Campaign ID': 'campaign' 118 | }, inplace=True) 119 | 120 | google = google.drop_duplicates(subset=['campaign']) 121 | appsflyer = pandas.merge(appsflyer, google, how='left', left_on='campaign', right_on='campaign', indicator=True) 122 | appsflyer['campaign'][appsflyer['_merge'] == 'both'] = appsflyer['campaign_name'] 123 | appsflyer.drop(['campaign_name', '_merge'], inplace=True, axis=1) 124 | appsflyer['tool'] = 'AppsFlyer' 125 | 126 | appsflyer['midia'] = 'unpaid' 127 | define_values = define_values[define_values['app'] == 'pax'] 128 | for i in ['source', 'midia']: 129 | define_aux = define_values[~define_values[i].isna()] 130 | for rows in [tuple(x) for x in define_aux.values]: 131 | if str(rows[2]) == 'nan': 132 | appsflyer[i][appsflyer['campaign'].str.contains(rows[3], na=False)] = rows[ 133 | 1 if i == 'source' else 0] 134 | else: 135 | appsflyer[i][appsflyer['source'].str.contains(rows[2], na=False)] = rows[ 136 | 1 if i == 'source' else 0] 137 | 138 | appsflyer = appsflyer[['date','tool','midia','source','os_name','campaign','adgroup','creative','installs', 'first_trip', 'sign_ups', 'clicks']] 139 | 140 | appsflyer['date'] = pandas.to_datetime(pandas.Series(appsflyer['date']), format="%Y-%m-%d") 141 | for date in date_list: 142 | # concat everything on the go 143 | appsflyer_data = pandas.concat([appsflyer_data, 144 | pandas.merge( 145 | (appsflyer[appsflyer['date'] == date]), 146 | (custos[((custos['End Date'] >= date) | (custos['End Date'].isnull())) & ( 147 | custos['Start Date'] <= date)]), 148 | how='left', 149 | left_on='campaign', 150 | right_on='campaign')]) 151 | appsflyer_data = pandas.merge(appsflyer_data, custosgmaps, how='left', left_on='source', right_on='source') 152 | 153 | appsflyer_data['amount_spent'] = 0 154 | appsflyer_data['sign_ups'] = appsflyer_data['sign_ups'].fillna(0) 155 | appsflyer_data['clicks'] = appsflyer_data['clicks'].fillna(0) 156 | appsflyer_data['amount_spent'][appsflyer_data['typeg'] == 'cpg'] = round((appsflyer_data['sign_ups'].astype(float) * 157 | appsflyer_data['cost_phone_activated'].astype( 158 | float)) + ( 159 | appsflyer_data['clicks'].astype(float) * 160 | appsflyer_data['cost_click'].astype( 161 | float))) 162 | 163 | appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpi'] = appsflyer_data['installs'].astype(float) * appsflyer_data['payout'].astype(float) 164 | appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpa'] = appsflyer_data['first_trip'].astype(float) * appsflyer_data['payout'].astype(float) 165 | appsflyer_data.drop(['type', 'typeg'], inplace=True, axis=1) 166 | 167 | 168 | 169 | appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].astype(str) 170 | appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].apply(lambda x: str(x.replace('nan', '0.0'))) 171 | appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].apply(lambda x: str(x.replace('.', ','))) 172 | appsflyer_data.drop(['payout', 'Start Date', 'End Date', 'cost_phone_activated', 'cost_click', 'clicks'], inplace=True, axis=1) 173 | 174 | logging.info("[PAX] Appsflyer Updated") 175 | print('Pax - Appsflyer Updated') 176 | return appsflyer_data.drop_duplicates().reset_index(drop=True) 177 | 178 | 179 | def retrieveAppsflyerInstalls(): 180 | conn = databseinit() 181 | curNewUsers = conn.cursor() 182 | curNewUsers.execute(QUERY_PAX_APPSFLYER_INSTALLS) 183 | col_names = [] 184 | for x in curNewUsers.description: 185 | col_names.append(x[0]) 186 | newusers = pandas.DataFrame(curNewUsers.fetchall(), columns=col_names) 187 | return newusers 188 | -------------------------------------------------------------------------------- /pax/database.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | from init.init import databseinit as databaseinit 3 | from utils.time import startdate 4 | from utils.time import enddate 5 | from utils.queries import QUERY_PAX_NEWAPP 6 | from utils.paths import PATH_DEFINE_VALUES 7 | import numpy 8 | import logging 9 | 10 | 11 | def baseDatabase(): 12 | #reportnewApppaxData = pandas.read_csv(r'/Users/nicolasmelo1/Desktop/ToDab Corrected.csv', sep=',') 13 | define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';') 14 | reportnewApppaxData = extractDatabasePFTsNewApp() 15 | if reportnewApppaxData.empty: 16 | reports = reportnewApppaxData 17 | reports['date'] = None 18 | reports['tool'] = None 19 | reports['midia'] = None 20 | reports['source'] = None 21 | reports['pax'] = None 22 | reports['trips'] = None 23 | reports['burn'] = None 24 | reports['gmv'] = None 25 | reports['takerate'] = None 26 | reports['pft_pax'] = None 27 | reports['pft_trips'] = None 28 | reports['pft_burn'] = None 29 | reports['pft_gmv'] = None 30 | reports['pft_takerate'] = None 31 | reports['region'] = None 32 | return reports 33 | reportnewApppaxData.columns = ['call_date', 'channel', 'metropolitan_area_name', 'pax', 'trips', 'burn', 'gmv', 34 | 'takerate', 'call_date.1', 'channel.1', 'metropolitan_area_name.1', 'pft_pax', 35 | 'pft_trips', 'pft_burn', 'pft_gmv', 'pft_takerate'] 36 | 37 | reportsPFT = pandas.DataFrame() 38 | reportsPFT = reportsPFT.append(reportnewApppaxData) 39 | reportnewApppaxData.drop( 40 | ['call_date.1', 'channel.1', 'metropolitan_area_name.1', 'pft_pax', 'pft_trips', 'pft_burn', 'pft_gmv', 41 | 'pft_takerate'], inplace=True, axis=1) 42 | reportsPFT.drop(['call_date', 'channel', 'metropolitan_area_name', 'pax', 'trips', 'burn', 'gmv', 'takerate'], 43 | inplace=True, axis=1) 44 | 45 | reportsPFT.rename(columns={'call_date.1': 'call_date', 'channel.1': 'channel', 46 | 'metropolitan_area_name.1': 'metropolitan_area_name'}, inplace=True) 47 | 48 | reportsPFT = reportsPFT.dropna(axis=0, how='all') 49 | 50 | reports = pandas.concat([reportnewApppaxData, reportsPFT]) 51 | reports['channel'] = reports['channel'].replace(r'', numpy.nan, regex=True) 52 | reports = reports.replace(numpy.nan, 0, regex=True) 53 | reports['channel'][reports['channel'] == 0] = 'Glispa' 54 | reports['metropolitan_area_name'][reports['metropolitan_area_name'] == 0] = 'BR' 55 | reports['metropolitan_area_name'][reports['metropolitan_area_name'].str.contains('Campos')] = 'SAO JOSE DOS CAMPOS' 56 | reports['metropolitan_area_name'] = reports['metropolitan_area_name'].str.upper() 57 | 58 | reports.rename(columns={'channel': 'source'}, inplace=True) 59 | 60 | reports['source'][reports['source'].str.contains('google')] = 'Google' 61 | 62 | 63 | reports['tool'] = 'Base' 64 | reports['midia'] = 'unpaid' 65 | define_values = define_values[(define_values['app'] == 'pax') & (define_values['source'].isna()) & ( 66 | define_values['campaign contains'].isna())] 67 | for rows in [tuple(x) for x in define_values.values]: 68 | reports['midia'][reports['source'].str.contains(rows[2], na=False)] = rows[0] 69 | 70 | #reports['midia'][reports['campaign'].str.contains('Perf')] = 'paid' 71 | #reports['midia'][reports['campaign'].str.contains('Brand')] = 'paid' 72 | reports['midia'][reports['source'].str.contains('Organic')] = 'organic' 73 | 74 | reports['call_date'] = pandas.to_datetime(pandas.Series(reports['call_date']), format="%Y-%m-%d") 75 | #reports['week'] = reports['call_date'].dt.week 76 | 77 | reports.rename(columns={'call_date': 'date', 78 | 'metropolitan_area_name': 'region'}, inplace=True) 79 | reports = reports[ 80 | ['date', 'tool', 'midia', 'source', 'pax', 'trips', 'burn', 'gmv', 'takerate', 'pft_pax', 81 | 'pft_trips', 'pft_burn', 'pft_gmv', 'pft_takerate', 'region']] 82 | reports = reports[(reports['date'] >= startdate) & (reports['date'] <= enddate)] 83 | logging.info("[PAX] Database Updated") 84 | print('Pax - Database Updated') 85 | return reports 86 | 87 | 88 | 89 | def extractDatabasePFTsNewApp(): 90 | conn = databaseinit() 91 | curNewUsers = conn.cursor() 92 | curNewUsers.execute(QUERY_PAX_NEWAPP) 93 | newusers = pandas.DataFrame(curNewUsers.fetchall()) 94 | return newusers 95 | 96 | -------------------------------------------------------------------------------- /pax/facebook.py: -------------------------------------------------------------------------------- 1 | from utils.apicalls import FacebookAds 2 | import utils.time 3 | import logging 4 | # Data organization and return full Facebook Data 5 | 6 | 7 | def facebookdata(ad_accounts): 8 | facebook = FacebookAds() 9 | facebook = facebook.reports(date_start=utils.time.startdate, date_end=utils.time.enddate, ad_accounts=ad_accounts, extracted=list()) 10 | facebook['tool'] = 'FaceAds' 11 | facebook['source'] = 'Facebook Ads' 12 | facebook['os_name'] = 'android' 13 | facebook['midia'] = 'paid' 14 | facebook['os_name'][facebook['campaign_name'].str.contains('iOS')] = 'ios' 15 | facebook['os_name'][facebook['campaign_name'].str.contains('IOS')] = 'ios' 16 | 17 | facebook.rename(columns={'date_start': 'date', 'campaign_name': 'campaign', 'adset_name': 'adgroup', 18 | 'ad_name': 'creative', 'spend': 'amount_spent'}, inplace=True) 19 | facebook = facebook[ 20 | ['date', 'tool', 'midia', 'source', 'os_name', 'campaign', 'adgroup', 'creative', 'amount_spent', 'impressions', 21 | 'clicks']] 22 | 23 | logging.info("[PAX] Facebook Updated") 24 | print('Pax - Facebook Updated') 25 | return facebook 26 | -------------------------------------------------------------------------------- /pax/google.py: -------------------------------------------------------------------------------- 1 | from utils.apicalls import GoogleAds 2 | from utils.paths import PATH_DEFINE_VALUES 3 | import utils.time 4 | import pandas 5 | import logging 6 | 7 | def googlereports(customer_ids): 8 | reports=pandas.DataFrame() 9 | for customer_id in customer_ids: 10 | data = GoogleAds() 11 | data = data.reports(dateStart=utils.time.startdatebase, dateEnd=utils.time.enddatebase, customerId=customer_id) 12 | 13 | data.drop(['Interactions'], inplace=True, axis=1) 14 | #Column creation 15 | data['midia'] = 'paid' 16 | data['source'] = 'Google' 17 | data['os_name'] = 'android' 18 | 19 | #Convert values based on data on created tables 20 | define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';') 21 | define_values = define_values[(define_values['app'] == 'pax') & (~define_values['source'].isna()) & (~define_values['campaign contains'].isna())] 22 | 23 | for rows in [tuple(x) for x in define_values.values]: 24 | data['source'][data['Campaign'].str.contains(rows[3], na=False)] = rows[1] 25 | 26 | 27 | data['os_name'][data['Campaign'].str.contains('iOS')] = 'ios' 28 | data.rename( 29 | columns={'Day': 'date', 30 | 'Campaign': 'campaign', 31 | 'Cost': 'amount_spent', 32 | 'Impressions': 'impressions', 33 | 'Clicks': 'clicks' 34 | }, inplace=True) 35 | data['tool'] = 'Adwords' 36 | reports = pandas.concat([reports, data]) 37 | 38 | reports = reports[['date', 'tool', 'midia', 'source', 'os_name', 39 | 'campaign', 'amount_spent', 'impressions', 'clicks']] 40 | logging.info("[PAX] Google Updated") 41 | print('Pax - Google Updated') 42 | return reports 43 | -------------------------------------------------------------------------------- /pax/performance.py: -------------------------------------------------------------------------------- 1 | from pax.appsflyer import appsflyerData 2 | from pax.facebook import facebookdata 3 | from pax.google import googlereports 4 | from pax.database import baseDatabase 5 | from utils.paths import PATH_SIGLAS_PRACAS 6 | from pax.twitter import twitterdata 7 | import unidecode 8 | import pandas 9 | import utils.time 10 | pandas.options.mode.chained_assignment = None 11 | 12 | 13 | def cleanNewAppData(performance): 14 | 15 | performance['os_name'][performance['campaign'].str.contains('iOS', na=False)] = 'ios' 16 | performance['os_name'][performance['campaign'].str.contains('IOS', na=False)] = 'ios' 17 | 18 | performance['campaign'][performance['campaign'] == 'GDN_And_MKT_BH-CPI924382686'] = 'GDN_And_MKT_BH-CPI' 19 | performance['campaign'][performance['campaign'] == 'GDN_And_MKT_VIX-CPC931642203'] = 'GDN_And_MKT_VIX-CPC' 20 | performance['campaign'][performance['campaign'] == 'GDN_And_MKT_VIX-CPI930974807'] = 'GDN_And_MKT_VIX-CPI' 21 | performance['campaign'][performance['campaign'] == 'SMS_All_Ops_CWB/market://details?id=com.app99.pax'] = 'SMS_All_Ops_CWB' 22 | performance['campaign'][performance['campaign'] == 'Spotify_All_Brand_BH-overlay'] = 'Spotify_All_Brand_BH' 23 | performance['campaign'][performance['campaign'] == 'SRC_And_Conc-GYN_UA929042102'] = 'SRC_And_Conc-GYN_UA' 24 | 25 | performance['campaign'][performance['campaign'] == 'RG-CWB-IOS-AppInstal'] = 'RG-CWB-IOS-AppInstall' 26 | 27 | if 'region' in performance.columns: 28 | region = pandas.read_csv(PATH_SIGLAS_PRACAS, sep=';') 29 | region['pracas'] = region['pracas'].str.upper() 30 | listofregions = [tuple(x) for x in region.values] 31 | 32 | performance['region'][performance['region'].isnull()] = 'BR' 33 | for i in range(0, len(listofregions)): 34 | performance['region'][performance['campaign'].str.contains(listofregions[i][0], na=False)] = \ 35 | listofregions[i][1] 36 | 37 | performance['region'] = performance['region'].apply(lambda x: unidecode.unidecode(x)) 38 | 39 | performance['week'] = performance['date'].dt.week 40 | 41 | return performance 42 | 43 | 44 | def performanceNewAppData(): 45 | performance = pandas.concat([ 46 | appsflyerData(), 47 | twitterdata(), 48 | facebookdata(['act_1894184000615284', 'act_1691552937545059', 'act_967083766658650']), 49 | googlereports(['771-742-8350', '411-922-6657']), 50 | baseDatabase() 51 | ]) 52 | performance = cleanNewAppData(performance) 53 | performance = performance[ 54 | ['date', 'week', 'tool', 'midia', 'source', 'os_name', 'campaign', 'adgroup', 'creative', 55 | 'installs', 'first_trip', 'sign_ups', 'amount_spent', 'impressions', 'clicks', 'pax', 'trips', 'burn', 'gmv', 56 | 'takerate', 'pft_pax', 57 | 'pft_trips', 'pft_burn', 'pft_gmv', 'pft_takerate', 'region']] 58 | return performance 59 | 60 | 61 | -------------------------------------------------------------------------------- /pax/twitter.py: -------------------------------------------------------------------------------- 1 | from utils.time import startdate 2 | from utils.apicalls import TwitterAds 3 | import utils.time 4 | import logging 5 | 6 | 7 | def twitterdata(): 8 | twitter = TwitterAds() 9 | twitter = twitter.reports(dateStart=utils.time.startdate, dateEnd=utils.time.enddate) 10 | twitter.drop(['id'], inplace=True, axis=1) 11 | twitter['tool'] = 'TwitterAds' 12 | twitter['source'] = 'Twitter' 13 | twitter['os_name'] = 'android' 14 | twitter['midia'] = 'paid' 15 | twitter['os_name'][twitter['name'].str.contains('iOS')] = 'ios' 16 | twitter['os_name'][twitter['name'].str.contains('IOS')] = 'ios' 17 | 18 | twitter.rename(columns={'billed_charge_local_micro': 'amount_spent', 'name': 'campaign'}, inplace=True) 19 | 20 | twitter = twitter[ 21 | ['date', 'tool', 'midia', 'source', 'os_name', 'campaign', 'amount_spent', 'impressions', 22 | 'clicks']] 23 | 24 | logging.info("[PAX] Twitter Updated") 25 | print('Pax - Twitter Updated') 26 | return twitter 27 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | attrs==18.1.0 3 | cached-property==1.4.3 4 | cachetools==2.1.0 5 | certifi==2018.4.16 6 | chardet==3.0.4 7 | defusedxml==0.5.0 8 | facebook-business==3.0.0 9 | facebookads==2.11.4 10 | google-api-python-client==1.7.3 11 | google-auth==1.5.0 12 | google-auth-httplib2==0.0.3 13 | google-auth-oauthlib==0.2.0 14 | googleads==12.1.0 15 | httplib2==0.11.3 16 | idna==2.7 17 | isodate==0.6.0 18 | lxml==4.2.1 19 | numpy==1.14.5 20 | oauth2client==4.1.2 21 | oauthlib==2.1.0 22 | pandas==0.23.1 23 | psycopg2==2.7.5 24 | pyasn1==0.4.3 25 | pyasn1-modules==0.2.1 26 | PyDrive==1.3.1 27 | python-dateutil==2.7.3 28 | pytz==2018.4 29 | PyYAML==3.12 30 | requests==2.19.1 31 | requests-oauthlib==1.0.0 32 | requests-toolbelt==0.8.0 33 | rsa==3.4.2 34 | six==1.11.0 35 | suds-jurko==0.6 36 | Unidecode==1.0.22 37 | uritemplate==3.0.0 38 | urllib3==1.23 39 | xmltodict==0.11.0 40 | zeep==3.0.0 41 | -------------------------------------------------------------------------------- /staticfiles/reports/performancereportdrivers.csv: -------------------------------------------------------------------------------- 1 | date,week,tool,midia,source,os_name,campaign,adgroup,creative,installs,criou_basic,enviou_todos_docs,amount_spent,impressions,clicks,signups,signups_with_migration,regulars,regulars_with_migration,dft,dft_with_migration,region -------------------------------------------------------------------------------- /staticfiles/reports/performancereportnewapp.csv: -------------------------------------------------------------------------------- 1 | date,week,tool,midia,source,os_name,campaign,adgroup,creative,installs,first_trip,sign_ups,amount_spent,impressions,clicks,pax,trips,burn,gmv,takerate,pft_pax,pft_trips,pft_burn,pft_gmv,pft_takerate,region -------------------------------------------------------------------------------- /staticfiles/tables/custos_appsflyer_newapp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/staticfiles/tables/custos_appsflyer_newapp.xlsx -------------------------------------------------------------------------------- /staticfiles/tables/custos_gmaps.csv: -------------------------------------------------------------------------------- 1 | source;typeg;cost_click;cost_phone_activated -------------------------------------------------------------------------------- /staticfiles/tables/media_and_source_by_source_and_campaign_names.csv: -------------------------------------------------------------------------------- 1 | midia;source;source contains;campaign contains;app 2 | ;Exemple;;Exemp;pax 3 | pago;;Exemp;GDN;pax -------------------------------------------------------------------------------- /staticfiles/tables/siglas_pracas.csv: -------------------------------------------------------------------------------- 1 | sigla;pracas 2 | SJC;Sao Jose dos Campos -------------------------------------------------------------------------------- /staticfiles/tables/source_and_campaigns_by_driverchanelid.csv: -------------------------------------------------------------------------------- 1 | driver_channel;source_name;campaign_name 2 | 100000;Example;Example -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/utils/__init__.py -------------------------------------------------------------------------------- /utils/apicalls.py: -------------------------------------------------------------------------------- 1 | from facebook_business.adobjects import adsinsights 2 | from facebook_business.adobjects import adaccount 3 | from init.init import adwordsinit 4 | from init.init import twitterinit 5 | from init.init import facebookinit 6 | from init.init import appsflyerinit 7 | from pandas.io.json import json_normalize 8 | import requests 9 | import pandas 10 | import datetime 11 | import io 12 | import time 13 | import random 14 | import facebookads.exceptions 15 | 16 | ######################################### 17 | #### #### 18 | #### This is for extracting data #### 19 | #### #### 20 | ######################################### 21 | 22 | 23 | # class to make facebook api calls 24 | class FacebookAds: 25 | def __init__(self): 26 | facebookinit() 27 | 28 | def reports(self, date_start, date_end, ad_accounts, extracted, reports=pandas.DataFrame()): 29 | ad_account = random.choice(ad_accounts) 30 | while ad_account in extracted: 31 | ad_account = random.choice(ad_accounts) 32 | 33 | print("[Facebook] - Extracting data for ad_account=%s" % (ad_account)) 34 | account = adaccount.AdAccount(ad_account) 35 | 36 | insights = account.get_insights(fields=[ 37 | adsinsights.AdsInsights.Field.date_start, 38 | adsinsights.AdsInsights.Field.campaign_name, 39 | adsinsights.AdsInsights.Field.adset_name, 40 | adsinsights.AdsInsights.Field.ad_name, 41 | adsinsights.AdsInsights.Field.impressions, 42 | adsinsights.AdsInsights.Field.clicks, 43 | adsinsights.AdsInsights.Field.spend, 44 | ], params={ 45 | 'level': adsinsights.AdsInsights.Level.ad, 46 | 'time_increment': '1', 47 | 'time_range': { 48 | 'since': date_start, 49 | 'until': date_end 50 | }, 51 | }, async=True) 52 | 53 | time.sleep(20) 54 | 55 | results = [] 56 | async_job = insights.remote_read() 57 | while async_job['async_status'] != 'Job Completed': 58 | print('[Facebook] - Percent completed from async run=' + str(async_job['async_percent_completion'])) 59 | time.sleep(20) 60 | async_job = insights.remote_read() 61 | if async_job['async_status'] == 'Job Completed': 62 | print('[Facebook] - Percent completed from async run=' + str(async_job['async_percent_completion'])) 63 | time.sleep(20) 64 | results = [x for x in insights.get_result()] 65 | 66 | if results: 67 | facebook = pandas.DataFrame(results, 68 | columns=['ad_name', 'adset_name', 'campaign_name', 'clicks', 'date_start', 69 | 'date_stop', 'impressions', 'spend']) 70 | facebook['spend'] = facebook['spend'].apply(lambda x: str(x.replace('.', ','))) 71 | facebook = facebook[ 72 | ['date_start', 'date_stop', 'campaign_name', 'adset_name', 'ad_name', 'impressions', 'clicks', 'spend']] 73 | facebook.drop(['date_stop'], inplace=True, axis=1) 74 | facebook['date_start'] = pandas.to_datetime(pandas.Series(facebook['date_start']), format="%Y-%m-%d") 75 | facebook = facebook.sort_values(by='date_start') 76 | reports = pandas.concat([reports, facebook]) 77 | 78 | else: 79 | facebook = pandas.DataFrame(results, 80 | columns=['ad_name', 'adset_name', 'campaign_name', 'clicks', 'date_start', 81 | 'date_stop', 'impressions', 'spend']) 82 | reports = pandas.concat([reports, facebook]) 83 | 84 | extracted.append(ad_account) 85 | if sorted(extracted) != sorted(ad_accounts): 86 | return self.reports(date_start, date_end, ad_accounts, extracted, reports) 87 | else: 88 | return reports 89 | 90 | # class to make google api calls 91 | class GoogleAds: 92 | 93 | def reports(self, dateStart, dateEnd, customerId=None): 94 | print("[Google] - Extracting reports data from customer_id=%s" % (customerId)) 95 | # initialize adwords 96 | adwords = adwordsinit(customerId) 97 | 98 | # watch the version, right now the version is 'v201710' it needs to be updated constantly. 99 | report_downloader = adwords.GetReportDownloader(version='v201802') 100 | 101 | # it's just a query, you can check more about the parameters here: 102 | # https://developers.google.com/adwords/api/docs/appendix/reports/campaign-performance-report?hl=pt-br#bidtype 103 | report_query = ('SELECT Date, CampaignName, Impressions, Interactions, Clicks, Cost ' 104 | 'FROM CAMPAIGN_PERFORMANCE_REPORT ' 105 | 'DURING ' + dateStart + ', ' + dateEnd) 106 | 107 | stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query,'CSV') 108 | 109 | # convert stream data to pandas, it doesn't give me a dataframe. 110 | data = pandas.read_csv(stream_data, 111 | sep=str(','), 112 | encoding='utf-8-sig', 113 | header=1, 114 | error_bad_lines=False, 115 | warn_bad_lines=False) 116 | 117 | # Cost come as millions, the real data is the number divided by 1.000.000 118 | data['Cost'] = data['Cost'].apply(lambda x: round(x/1000000)) 119 | 120 | # Delete last line {line from total values of google] 121 | data = data[:-1] 122 | 123 | # Convert to datetime and sort it 124 | data['Day'] = pandas.to_datetime(pandas.Series(data['Day']), format="%Y-%m-%d") 125 | data = data.sort_values(by='Day') 126 | return data 127 | 128 | 129 | #google api calls to extract campaign names with campaign ids 130 | def reportcampaigns(self, customerId=None): 131 | print("[Google] - Extracting campaigns from customer_id=%s" % (customerId)) 132 | adwords = adwordsinit(customerId) 133 | report_downloader = adwords.GetReportDownloader(version='v201802') 134 | report_query = ('SELECT CampaignName, CampaignId ' 135 | 'FROM CAMPAIGN_PERFORMANCE_REPORT') 136 | stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query, 'CSV') 137 | 138 | data = pandas.read_csv(stream_data, 139 | sep=str(','), 140 | encoding='utf-8-sig', 141 | header=1, 142 | error_bad_lines=False, 143 | warn_bad_lines=False) 144 | data = data[:-1] 145 | return data 146 | 147 | 148 | # class to make twitter api calls 149 | class TwitterAds: 150 | 151 | # to get the campaigns of twitter using twitter api you'll want to get your hands "dirty" 152 | # different than google or fb api, twitter ads api is quite new and doesn't come with a lot of tools out of the box 153 | # you need to build it yourself 154 | def reportcampaigns(self, dateStart): 155 | # this gets all the campaigns and campaignids that we made with the account 156 | getcampaigns = 'https://ads-api.twitter.com/2/accounts/18ce54np2w4/campaigns' 157 | content = requests.get(getcampaigns, auth=twitterinit()).json() 158 | campaigns = json_normalize(content, ['data']) 159 | 160 | campaigns.drop(['updated_at', 'total_budget_amount_local_micro', 'start_time', 'standard_delivery', 'servable', 161 | 'funding_instrument_id', 'frequency_cap', 'entity_status', 'duration_in_days', 'deleted', 162 | 'daily_budget_amount_local_micro', 'currency', 'created_at', 'account_id'], 163 | inplace=True, 164 | axis=1) 165 | campaigns['end_time'] = campaigns['end_time'].apply(lambda x: str(x).split('T')[0] if x is not None else None) 166 | campaigns['end_time'] = pandas.to_datetime(pandas.Series(campaigns['end_time']), format="%Y-%m-%d") 167 | 168 | campaigns = campaigns[(campaigns['end_time'] >= dateStart) | (campaigns['reasons_not_servable'] != 'EXPIRED')] 169 | campaigns.drop(['end_time', 'reasons_not_servable'], inplace=True, axis=1) 170 | return campaigns 171 | 172 | 173 | def recursiveextractor(self, datestart, dateend, campaignslist, placement, twitterdataframe=pandas.DataFrame()): 174 | 175 | # this is the coolest part of the program. what happens is that, twitter doesn't give me by default 176 | # twitter data divided by date. so what i need to do is to do it myself. 177 | # i'll get then data from twitter from 24h date range. so i'll loop through this func until datestart is equal to dateend 178 | 179 | lastdate = datetime.datetime.strptime(datestart, "%Y-%m-%d") + datetime.timedelta(days=1) 180 | lastdate = str(lastdate).partition(" ")[0] 181 | # make the call 182 | content = requests.get('https://ads-api.twitter.com/2/stats/accounts/18ce54np2w4/', auth=twitterinit(), 183 | params={ 184 | 'start_time': datestart+'T00:00:00-0300', 185 | 'end_time': str(lastdate)+'T00:00:00-0300', 186 | 'entity': 'CAMPAIGN', 187 | 'granularity': 'TOTAL', 188 | 'metric_groups': 'ENGAGEMENT,BILLING', 189 | 'placement': placement, 190 | 'entity_ids': campaignslist 191 | }).json() 192 | 193 | # some json partition and normalization to convert it to dataframe 194 | campaignid = json_normalize(content, ['data']) 195 | campaignid.drop(['id_data'], inplace=True, axis=1) 196 | twitterreport = json_normalize(content['data'], 'id_data') 197 | twitterreport = pandas.concat( 198 | [twitterreport.drop('metrics', axis=1), pandas.DataFrame(twitterreport['metrics'].tolist())], axis=1) 199 | 200 | # drop what i don't need 201 | twitterreport.drop(['card_engagements', 'carousel_swipes', 'engagements', 'follows', 'likes', 'poll_card_vote', 202 | 'qualified_impressions', 'billed_engagements', 203 | 'replies', 'app_clicks', 'segment', 'tweets_send', 'url_clicks', 'retweets'], inplace=True, axis=1) 204 | 205 | # as date is something i've came up with, to get this data divided by date i need to put it myself on the DF 206 | twitterreport['date'] = datestart 207 | 208 | 209 | # some etl process in the metrics fields 210 | twitterreport['billed_charge_local_micro'] = twitterreport['billed_charge_local_micro'].apply( 211 | lambda x: 0 if x == None else int(round(sum(x)) / 1000000)) 212 | twitterreport['impressions'] = twitterreport['impressions'].apply(lambda x: 0 if x == None else sum(x)) 213 | twitterreport['clicks'] = twitterreport['clicks'].apply(lambda x: 0 if x == None else sum(x)) 214 | 215 | twitterreport = pandas.concat([campaignid, twitterreport], axis=1) 216 | 217 | 218 | # twitter dataframe is what contains all the dataframes from each day 219 | twitterdataframe = twitterdataframe.append(twitterreport, ignore_index=True) 220 | 221 | if dateend == datestart: 222 | return twitterdataframe 223 | else: 224 | return self.recursiveextractor(str(lastdate), dateend, campaignslist, placement, twitterdataframe) 225 | 226 | 227 | def reports(self, dateStart, dateEnd): 228 | print("[Twitter] - Extracting twitter data") 229 | # this is what you call when you create a twitter object to retrieve the data 230 | # you need to pass only the Start Date and End Date Parameter 231 | # first you create an empty dataframe, that`ll hold the data inside the for loop 232 | twitter = pandas.DataFrame() 233 | 234 | # what this does is returning a dataframe with the following columns: campaign id and campaign name 235 | # in order to extract the performance data, you have to pass the campaign id you are trying to get the values 236 | # so what i do is extract all of our campaigns in this account 237 | twitterCampaigns = self.reportcampaigns(dateStart) 238 | 239 | # exceded number of campaigns is for saying that the number of campaigns i'm trying to retrieve the data is bigger than 20. 240 | # if we have more than 20 campaigns running we need to split it. 241 | excededNumberOfCampaigns = None 242 | 243 | # this converts the campaigns ids to a list 244 | twitterCampaignsList = twitterCampaigns['id'].tolist() 245 | 246 | 247 | # the placement is for saying that: you want campaigns from where? 248 | # "we have campaigns that can be distributed in our platform or outside, with third party publishers" 249 | # as we want both we create a list containing both. 250 | # This is because we can't make the twitter call passing 'ALL_ON_TWITTER' and 'PUBLISHER_NETWORK' in the same time. 251 | # we will need to make two calls 252 | placements = ['ALL_ON_TWITTER', 'PUBLISHER_NETWORK'] 253 | 254 | 255 | # where the magic happens, first we need to loop through placements 256 | for placement in placements: 257 | twitterReportsDataFrame = pandas.DataFrame() 258 | 259 | # as i said, if it's bigger than 20 you need to split it, and there comes the campaignsList variable 260 | if len(twitterCampaignsList) > 20: 261 | excededNumberOfCampaigns = True 262 | campaignsList = twitterCampaignsList[:20] 263 | else: 264 | campaignsList = twitterCampaignsList 265 | 266 | # list to string, so i can make the call 267 | campaigns = ','.join(campaignsList) 268 | 269 | # as i said if the excededNumberOfCampaigns is true we need to split it. 270 | # the idea is simple: while twitterCampaignsList exists and it's not an empty list 271 | # you'll want to get only the values that aren't in campaignsList, that's why you override the variable with new values 272 | # then if twitterCampaignsList is still bigger than 20, you split it again consecutivelly. 273 | # you need the aux variable to don't override twitterCampaignsList, so on the next placement, you can start it off again 274 | # it's cool to see that, you get out of the while when the excededNumberOfCampaigns is False, so when campaignsList is less 275 | # than 20 in length, the final call happens outside the if clause, on the next step 276 | if excededNumberOfCampaigns is True: 277 | auxTwitterCampaignsList = twitterCampaignsList 278 | while excededNumberOfCampaigns is True: 279 | twitterReportsDataFrame = pandas.concat([twitterReportsDataFrame, self.recursiveextractor(dateStart, 280 | dateEnd, 281 | campaigns, 282 | placement)]) 283 | auxTwitterCampaignsList = [x for x in auxTwitterCampaignsList if x not in campaignsList] 284 | if len(auxTwitterCampaignsList) > 20: 285 | excededNumberOfCampaigns = True 286 | campaignsList = auxTwitterCampaignsList[:20] 287 | else: 288 | excededNumberOfCampaigns = False 289 | campaignsList = auxTwitterCampaignsList 290 | campaigns = ','.join(campaignsList) 291 | 292 | # in case it`s less than 20 campaigns you doesn't enter the if clause, so you just want to get the data 293 | twitterReportsDataFrame = pandas.concat([twitterReportsDataFrame, self.recursiveextractor(dateStart, 294 | dateEnd, campaigns, placement)]) 295 | 296 | # remember our dataframe containing ids and campaign names? you use it to get the name of the campaigns. 297 | twitterreport = pandas.merge(twitterReportsDataFrame, 298 | twitterCampaigns, how='inner', left_on='id', right_on='id') 299 | 300 | 301 | twitter = pandas.concat([twitter, twitterreport]) 302 | twitter['date'] = pandas.to_datetime(pandas.Series(twitter['date']), format="%Y-%m-%d") 303 | twitter = twitter.sort_values(by='date') 304 | return twitter 305 | 306 | 307 | #class to make adjust api calls 308 | class Adjust: 309 | 310 | #this is to group the data, but as i stated the data already comes grouped 311 | ''' 312 | groupingCategorizer = [ 313 | ('date', ['day', 'hour', 'week', 'month']), 314 | ('tracker_name', ['trackers']), 315 | ('network', ['networks']), 316 | ('campaign', ['campaigns']), 317 | ('adgroup', ['adgroups']), 318 | ('creative', ['creatives']), 319 | ('country', ['countries']), 320 | ('device_type', ['device_types']), 321 | ('region', ['region']), 322 | ('os_name', ['os_names']) 323 | ] 324 | ''' 325 | def reports(self, dateStart, dateEnd, appid, grouping, eventKpis, kpis=None, trackerFilter=None): 326 | eventKpisList = [] 327 | 328 | 329 | for data in eventKpis: 330 | eventKpisList.append(data) 331 | 332 | 333 | kpis = ['installs'] if kpis is None else kpis 334 | trackerFilter = [''] if trackerFilter is None else trackerFilter 335 | 336 | trackerFilter = ','.join(trackerFilter) 337 | kpis = ','.join(kpis) 338 | eventKpisList = ','.join(eventKpisList) 339 | groupingList = ','.join(grouping) 340 | content = requests.get('https://api.adjust.com/kpis/v1/' + appid + '.csv', 341 | params={ 342 | 'start_date': dateStart, 343 | 'end_date': dateEnd, 344 | 'kpis': kpis, 345 | 'event_kpis': eventKpisList, 346 | 'user_token': 'user_token', 347 | 'grouping': groupingList, 348 | 'tracker_filter': trackerFilter 349 | }).content 350 | data = pandas.read_csv(io.StringIO(content.decode('utf-8'))) 351 | data = data.rename(columns=eventKpis) 352 | data.drop(['tracker_token'], inplace=True, axis=1) 353 | data['date'] = pandas.to_datetime(pandas.Series(data['date']), format="%Y-%m-%d") 354 | data = data.sort_values(by='date') 355 | 356 | return data 357 | 358 | 359 | #class to make appsflyer api calls 360 | class AppsFlyer: 361 | def reports(self, dateStart, dateEnd, appNames, callTypes, fields=None, eventName=None, countApp=0, countCall=0, data=pandas.DataFrame(), saveEndDate=None): 362 | 363 | print("[Appsflyer] - Extracting appsflyer data for app_name=%s and call_type=%s" % 364 | (appNames[countApp], callTypes[countCall])) 365 | 366 | # this initializes every variable passed 367 | eventNameList = ','.join(eventName) if eventName is not None else '' 368 | fieldsList = ','.join(fields) if fields is not None else '' 369 | field = 'fields' if fields else '' 370 | events = 'event_name' if 'in_app' in callTypes[countCall] else '' 371 | dataFromCall = pandas.DataFrame() 372 | #the call get made to contentappsflyer 373 | try: 374 | contentappsflyer = requests.get('https://hq.appsflyer.com/export/' + appNames[countApp] + '/' + callTypes[countCall] + '/v5', 375 | params={ 376 | 'api_token': appsflyerinit(), 377 | 'timezone': '-03:00', 378 | 'from': dateStart, 379 | 'to': dateEnd, 380 | field: fieldsList, 381 | events: eventNameList 382 | }).content 383 | # tranform the data recieved to dataframe 384 | dataFromCall = pandas.read_csv(io.StringIO(contentappsflyer.decode('utf-8'))) 385 | if callTypes[countCall] in ['installs_report', 'in_app_events_report', 'uninstall_events_report', 'organic_installs_report', 'organic_in_app_events_report']: 386 | dataFromCall['Media Source'] = dataFromCall['Media Source'].fillna('no_channel') 387 | # some classification processes 388 | # the first is for which operational system is it for 389 | # the second is to set values if organic raw data gets called 390 | if 'organic' in callTypes[countCall]: 391 | dataFromCall['Media Source'] = 'Organic' 392 | dataFromCall['Campaign'] = '0' 393 | dataFromCall['Adset'] = '0' 394 | if 'com.' in appNames[countApp]: 395 | dataFromCall['os_name'] = 'android' 396 | else: 397 | dataFromCall['os_name'] = 'ios' 398 | except: 399 | self.reports(dateStart=dateStart, dateEnd=dateEnd, appNames=appNames, callTypes=callTypes, fields=fields, 400 | eventName=eventName, countApp=countApp, 401 | countCall=countCall, 402 | data=data, saveEndDate=saveEndDate) 403 | # The func has two counters in it so it can iterate over 404 | # This means the data retrieved is valid, so you go for the loop, there are 2 loops in this func, first run one then the other 405 | if len(dataFromCall.index) < 200000: 406 | #the app name was completed 407 | if len(appNames) == countApp+1: 408 | #the loop was completed 409 | if len(callTypes) == countCall+1: 410 | 411 | data = pandas.concat([data, dataFromCall]) 412 | #now we return the data and get out of the func 413 | return data 414 | else: 415 | # If saveStartDate exists it replaces dateEnd parameter and sets itself to None so it can run nicely 416 | if saveEndDate: 417 | dateEnd= saveEndDate 418 | saveEndDate = None 419 | else: 420 | pass 421 | # The call fields are for the second loop, so for every new iteration the counter for appNames gets restarted and for calls gets added 1 422 | data = pandas.concat([data, dataFromCall]) 423 | return self.reports(dateStart, dateEnd, appNames, callTypes, fields, eventName, countApp=0, 424 | countCall=countCall+1, 425 | data=data, saveEndDate=saveEndDate) 426 | else: 427 | # If saveStartDate exists it replaces dateEnd parameter and sets itself to None so it can run nicely [2 428 | if saveEndDate: 429 | dateEnd = saveEndDate 430 | saveEndDate = None 431 | else: 432 | pass 433 | data = pandas.concat([data, dataFromCall]) 434 | # The app fields are for the first loop, so for every new iteration the counter for calls stays the same and for app it adds 1 435 | return self.reports(dateStart, dateEnd, appNames, callTypes, fields=fields, eventName=eventName, countApp=countApp+1, countCall=countCall, data=data, saveEndDate=saveEndDate) 436 | else: 437 | # Where the magic happens, and why this is a recursive func 438 | # If the dataFrame have 2000000 rows it runs again without changing the counters 439 | # Also, the dateEnd parameter gets updated with the time of the last row of the dataFrame 440 | saveEndDate = dateEnd if saveEndDate is None else saveEndDate 441 | dateEnd = dataFromCall['Event Time' if 'event_time' in fields else 'Install Time'].iloc[-1][:-3] 442 | 443 | # The time comes as yyyy-mm-dd hh:mm:ss so i take out of the string ':ss' to update dateEnd parameter 444 | data = pandas.concat([data, dataFromCall]) 445 | return self.reports(dateStart=dateStart, dateEnd=dateEnd, appNames=appNames, callTypes=callTypes, fields=fields, eventName=eventName, countApp=countApp, 446 | countCall=countCall, 447 | data=data, saveEndDate=saveEndDate) 448 | 449 | -------------------------------------------------------------------------------- /utils/drive.py: -------------------------------------------------------------------------------- 1 | from utils.paths import all_paths_tabelas, all_paths_relatorios 2 | from init.init import googledriveinit 3 | from pydrive.drive import GoogleDrive 4 | import pandas 5 | import os 6 | 7 | 8 | def load_files(): 9 | files_to_load = [f for f in os.listdir(all_paths_tabelas) if not f.startswith('.')] 10 | 11 | drive = GoogleDrive(googledriveinit()) 12 | 13 | file_list = drive.ListFile({'q': "'folder_id' in parents and trashed=false"}).GetList() 14 | for file1 in file_list: 15 | print('[Google Drive] - File to load: %s, id: %s' % (file1['title'], file1['id'])) 16 | if file1['title'] in files_to_load: 17 | file = drive.CreateFile({'id': file1['id']}) 18 | file.GetContentFile(all_paths_tabelas + file1['title']) 19 | 20 | 21 | def upload_files(): 22 | files_to_upload = [f for f in os.listdir(all_paths_relatorios) if not f.startswith('.')] 23 | 24 | drive = GoogleDrive(googledriveinit()) 25 | 26 | file_list = drive.ListFile({'q': "'folder_id' in parents and trashed=false"}).GetList() 27 | for file1 in file_list: 28 | print('[Google Drive] - File to upload: %s, id: %s' % (file1['title'], file1['id'])) 29 | if file1['title'] in files_to_upload: 30 | file = drive.CreateFile({'id': file1['id']}) 31 | file.SetContentFile(all_paths_relatorios + file1['title']) 32 | file['title'] = file1['title'] 33 | file.Upload() 34 | -------------------------------------------------------------------------------- /utils/paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # This is step is for getting the path to the folders, you have 2 aux folders 4 | # one is called Tabelas which are auxiliary tables, the other is Relatorios 5 | # where you'll put all the generated csvs 6 | 7 | 8 | # This is the main path, the others are static and doesn't change 9 | 10 | path = '/'.join(os.path.realpath(__file__).replace('\\', '/').split('/')[:-2]) 11 | 12 | ''' 13 | ############################################ 14 | ### !!!!! DON'T CHANGE !!!!! ### 15 | ############################################ 16 | ''' 17 | 18 | #This is the paths to write and read reports or tables 19 | all_paths_tabelas = path + r'/staticfiles/tables/' 20 | all_paths_relatorios = path + r'/staticfiles/reports/' 21 | 22 | ''' 23 | ############################################ 24 | ### READ ### 25 | ############################################ 26 | ''' 27 | 28 | #PAX OLD APP 29 | PATH_CUSTOS_ADJUST = all_paths_tabelas + 'custos_adjust.csv' 30 | PATH_CUSTOS_GMAPS = all_paths_tabelas + 'custos_gmaps.csv' 31 | 32 | #DRIVERS 33 | PATH_CUSTOS_ADJUST_DRIVERS = all_paths_tabelas + 'custos_adjust_drivers.csv' 34 | PATH_TO_DRIVER_CHANNEL_EQUIVALENCE = all_paths_tabelas + 'source_and_campaigns_by_driverchanelid.csv' 35 | 36 | #PAX AND DRIVERS 37 | PATH_CUSTOS_APPSFLYER = all_paths_tabelas + 'custos_appsflyer_newapp.xlsx' 38 | PATH_DEFINE_VALUES = all_paths_tabelas + 'media_and_source_by_source_and_campaign_names.csv' 39 | #PAX 40 | PATH_SIGLAS_PRACAS = all_paths_tabelas + 'siglas_pracas.csv' 41 | 42 | 43 | ''' 44 | ############################################ 45 | ### WRITE ### 46 | ############################################ 47 | ''' 48 | 49 | #AUTOMATICALLY 50 | PATH_TO_CUSTOS_ADJUST = all_paths_tabelas + 'custos_adjust.csv' 51 | PATH_TO_CUSTOS_ADJUST_DRIVERS = all_paths_tabelas + 'custos_adjust_drivers.csv' 52 | 53 | #MANUAL 54 | PATH_TO_RELATORIOS = all_paths_relatorios -------------------------------------------------------------------------------- /utils/queries.py: -------------------------------------------------------------------------------- 1 | import utils.time 2 | 3 | import datetime 4 | 5 | # Only Queries 6 | 7 | ''' 8 | ############################################ 9 | ### FUNCTIONS ### 10 | ############################################ 11 | ''' 12 | datestart = datetime.datetime.strptime(utils.time.startdate, "%Y-%m-%d") - datetime.timedelta(days=30) 13 | datestart = str(datestart).partition(" ")[0] 14 | 15 | # EXAMPLE 16 | ''' 17 | ############################################ 18 | ### DRIVERS ### 19 | ############################################ 20 | ''' 21 | #BASE OLD APP 22 | QUERY_DRIVER_FIRST_TRIP = """SELECT * 23 | WHERE date_value BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 24 | GROUP BY 1, 2, 3, 4, 5, 6 25 | ORDER BY 1, 2""" 26 | QUERY_DRIVER_NEW_REGULAR = """SELECT * 27 | WHERE date_value BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 28 | GROUP BY 1, 2, 3, 4, 5, 6 29 | ORDER BY 1, 2""" 30 | QUERY_DRIVER_SIGN_UP = """SELECT * 31 | WHERE date_value BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 32 | GROUP BY 1, 2, 3, 4, 5, 6 33 | ORDER BY 1, 2""" 34 | 35 | #COHORT OLD APP 36 | QUERY_DRIVER_COHORT = """ 37 | SELECT 38 | where to_char((install_date::DATE),'yyyy-MM-dd') BETWEEN '""" + datestart + """' AND '""" + utils.time.enddate + """' 39 | and activity_kind = 'install' 40 | group by 1,2,3 41 | order by 1,2,3""" 42 | 43 | #BASE NEW APP 44 | QUERY_DRIVER_DFT_GMV_NEWAPP = """ 45 | SELECT * 46 | WHERE (to_date((ddb.reg_time), 'YYYY-MM-DD HH24:MI:SS') - INTERVAL '11 hours') :: DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 47 | GROUP BY 1, 2, 3, 4, 5 48 | ORDER BY 1 49 | """ 50 | 51 | QUERY_DRIVER_NEW_REGULAR_NEWAPP = """ 52 | SELECT * 53 | WHERE (to_date((ddb.work_time), 'YYYY-MM-DD HH24:MI:SS') - 54 | INTERVAL '11 hours') :: DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 55 | GROUP BY 1, 2, 3, 4, 5 56 | ORDER BY 1 57 | """ 58 | 59 | QUERY_DRIVER_APPSFLYER_INSTALLS = """SElECT * 60 | WHERE app_id IN ('apps_drivers') and CONVERT_TIMEZONE('GMT', 'America/Sao_Paulo',install_time::timestamp)::DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 61 | GROUP BY 1,2,3,4,5 62 | ORDER BY 1,2""" 63 | 64 | ''' 65 | ############################################ 66 | ### PAX ### 67 | ############################################ 68 | ''' 69 | 70 | #NEW APP 71 | QUERY_PAX_NEWAPP = """ 72 | SELECT *""" 73 | 74 | QUERY_PAX_APPSFLYER_INSTALLS = """SELECT * 75 | WHERE app_id IN ('apps_pax') and install_time::DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """' 76 | GROUP BY 1,2,3,4,5,6 77 | ORDER BY 1,2""" 78 | 79 | 80 | TESTE_QUERY_PAX_MAU_REGIAO = """SELECT * 81 | WHERE r.call_date BETWEEN '""" + utils.time.startdate + ' 00:00:00' + """"' AND '""" + utils.time.enddate + """' + ' 23:59:59' 82 | GROUP BY 1, 2, 3, 4, 5 83 | ORDER BY 1, 2""" 84 | -------------------------------------------------------------------------------- /utils/time.py: -------------------------------------------------------------------------------- 1 | import time 2 | import datetime 3 | 4 | # This is only to get the current time. We're only looking for reports inside our current month. 5 | # dateStart is always day 1 of the month we are currently in, the only if is if today is day 1, then we close the month. 6 | # dateEnd is always day-1 7 | # the xxxxdate() method returns date as yyyy-MM-dd (used in adjust, facebook and some querys of our database) 8 | # the xxxxdatebase() method returns date as yyyyMMdd (used in google and some of our database querys) 9 | 10 | 11 | def datestart(currentDate = None): 12 | #if today is the first day of the month we will get the first day of the last month. 13 | 14 | if currentDate: 15 | if currentDate.day == 1: 16 | today = '01' 17 | currmonth = currentDate.month 18 | month = int(currmonth) - 1 19 | year = currentDate.year 20 | if month < 10: 21 | return str(year) + '-' + '0' + str(month) + '-' + str(today) 22 | else: 23 | return str(year) + '-' + str(month) + '-' + str(today) 24 | else: 25 | today = '01' 26 | currdate = str(currentDate.year) + '-' + (str(currentDate.month) if currentDate.month > 9 else '0' + str(currentDate.month)) 27 | return currdate + '-' + str(today) 28 | else: 29 | if time.strftime("%d") == '01': 30 | today = '01' 31 | currmonth = time.strftime('%m') 32 | month = int(currmonth) - 1 33 | year = time.strftime('%Y') 34 | if month < 10: 35 | return year + '-' + '0' + str(month) + '-' + today 36 | else: 37 | return year + '-' + str(month) + '-' + today 38 | else: 39 | today = '01' 40 | currdate = time.strftime("%Y-%m") 41 | return currdate + '-' + str(today) 42 | 43 | 44 | def dateend(): 45 | datetoend = str(datetime.datetime.now() - datetime.timedelta(days=1)).partition(" ")[0] 46 | return datetoend 47 | 48 | 49 | def datetobase(date): 50 | return date.replace('-', '') 51 | 52 | 53 | startdate = datestart() 54 | startdatebase = datetobase(startdate) 55 | enddate = dateend() 56 | enddatebase = datetobase(enddate) 57 | --------------------------------------------------------------------------------