├── .gitignore
├── LICENSE
├── README.md
├── client_secrets.json
├── credentials.json
├── drivers
    ├── __init__.py
    ├── appsflyer.py
    ├── database.py
    ├── facebook.py
    ├── google.py
    └── performance.py
├── init
    ├── __init__.py
    └── init.py
├── main.py
├── pax
    ├── __init__.py
    ├── appsflyer.py
    ├── database.py
    ├── facebook.py
    ├── google.py
    ├── performance.py
    └── twitter.py
├── requirements.txt
├── staticfiles
    ├── reports
    │   ├── performancereportdrivers.csv
    │   └── performancereportnewapp.csv
    └── tables
    │   ├── custos_appsflyer_newapp.xlsx
    │   ├── custos_gmaps.csv
    │   ├── media_and_source_by_source_and_campaign_names.csv
    │   ├── siglas_pracas.csv
    │   └── source_and_campaigns_by_driverchanelid.csv
└── utils
    ├── __init__.py
    ├── apicalls.py
    ├── drive.py
    ├── paths.py
    ├── queries.py
    └── time.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | #IntelliJ
 10 | .idea/
 11 | 
 12 | # macOS
 13 | .DS_Store
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Nicolas Leal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Reports
 2 | This was one of the first programs i made for 99. It gets data from day 1 from the current month to d-1 from current month
 3 | Python program to extract locally a .csv of all the data for marketing performance team from all the platforms 
 4 | Google, Facebook, Twitter, Appsflyer and Adjust (we are currently not collecting this data anymore).
 5 | 
 6 | ## Initialize
 7 | You can change the init of the APIs, set credentials and credential related stuff on init/init.py
 8 | ```
 9 | pip install -r /path/to/performance_reports/requirements.txt
10 | ```
11 | 
12 | ## Utils
13 | + ***apicalls.py*** - classes to retrieve data from the services API.
14 | + ***drive.py*** - sends and recieves data from/to drive.
15 | + ***paths.py*** - gets the path for reports and tables.
16 | + ***queries.py*** - holds queries to get data from the database as a dataframe (if there's any change in a query, don't forget
17 | to change the dataframes column names).
18 | + ***time.py*** - gets the start_date(since when) and end_date(to when) parameters.
19 | 
20 | ## Run
21 | Easy peasie
22 | ```
23 | python main.py
24 | ```
25 | As you might think, ***pax*** folder is for retrieving and ETL on passengers data, ***drivers*** is the same but for drivers data
26 | 
27 | ## StaticFiles
28 | There are 2 folders: ***tables*** and ***reports***.
29 | ### Tables
30 | This holds secondary data used by the program, it automatically downloads and update your local files from Google Drive
31 | once you start the program.
32 | 
33 | ### Reports
34 | Holds the reports generated by the program, it automatically sends to Google Drive once the program finishes running
35 | 
36 | 


--------------------------------------------------------------------------------
/client_secrets.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/client_secrets.json


--------------------------------------------------------------------------------
/credentials.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/credentials.json


--------------------------------------------------------------------------------
/drivers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/drivers/__init__.py


--------------------------------------------------------------------------------
/drivers/appsflyer.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import logging
  3 | import pandas
  4 | import requests
  5 | import datetime
  6 | import utils.time
  7 | from utils.paths import PATH_CUSTOS_APPSFLYER, PATH_DEFINE_VALUES
  8 | from utils.queries import QUERY_DRIVER_APPSFLYER_INSTALLS
  9 | from utils.apicalls import GoogleAds, AppsFlyer
 10 | from init.init import databseinit
 11 | 
 12 | 
 13 | def appsflyerData():
 14 |     appsflyer_data = pandas.DataFrame()
 15 |     define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';')
 16 |     custos = pandas.read_excel(PATH_CUSTOS_APPSFLYER,
 17 |                                delimiter=';', keep_default_na=False,
 18 |                                na_values=['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', '#NA', 'NULL',
 19 |                                           'NaN', '-NaN', 'nan', '-nan'])
 20 |     custos = custos.replace(r'', numpy.nan, regex=True)
 21 | 
 22 |     # format dataframes
 23 |     custos['End Date'] = pandas.to_datetime(pandas.Series(custos['End Date']), format="%d/%m/%Y")
 24 |     custos['Start Date'] = pandas.to_datetime(pandas.Series(custos['Start Date']), format="%d/%m/%Y")
 25 | 
 26 |     # config startdate and enddate and create a list of dates
 27 |     start = datetime.datetime.strptime(utils.time.startdate, "%Y-%m-%d")
 28 |     end = datetime.datetime.strptime(utils.time.enddate, "%Y-%m-%d")
 29 |     date_list = [start + datetime.timedelta(days=x) for x in range(0, (end - start).days + 1)]
 30 |     custos = custos[(custos['End Date'] > start) | (custos['End Date'].isnull())]
 31 | 
 32 |     appsflyer = AppsFlyer()
 33 |     appsflyer = appsflyer.reports(utils.time.startdate, utils.time.enddate,
 34 |                                   ['app_driver_android', 'app_driver_ios'],
 35 |                                   ['in_app_events_report', 'organic_in_app_events_report'],
 36 |                                   ['event_time', 'media_source', 'campaign', 'af_adset', 'event_name'],
 37 |                                   ['app_driver_event'])
 38 |     appsflyer['Campaign'] = appsflyer['Campaign'].fillna('None')
 39 |     appsflyer['Adset'] = appsflyer['Adset'].fillna('None')
 40 |     appsflyer['Media Source'] = appsflyer['Media Source'].fillna('NA')
 41 |     appsflyer['enviou_todos_docs'] = 1
 42 | 
 43 |     appsflyer.drop(['Event Name'], inplace=True, axis=1)
 44 |     appsflyer['Event Time'] = appsflyer['Event Time'].apply(lambda x: x.split(' ')[0])
 45 |     appsflyer = appsflyer.groupby(['Event Time', 'Media Source', 'Campaign','Adset', 'os_name']).sum()
 46 |     appsflyer = appsflyer.reset_index()
 47 |     appsflyer = appsflyer.rename(columns={
 48 |         'Event Time': 'date',
 49 |         'Media Source': 'source',
 50 |         'Campaign': 'campaign',
 51 |         'Adset': 'adgroup'
 52 |     })
 53 | 
 54 |     appsflyer = pandas.concat([appsflyer, retrieveAppsflyerInstalls()])
 55 | 
 56 |     google = GoogleAds()
 57 |     google = google.reportcampaigns()
 58 |     google.rename(
 59 |         columns={
 60 |             'Campaign': 'campaign_name',
 61 |             'Campaign ID': 'campaign'
 62 |         }, inplace=True)
 63 | 
 64 |     google = google.drop_duplicates(subset=['campaign'])
 65 | 
 66 |     appsflyer = pandas.merge(appsflyer, google, how='left', left_on='campaign', right_on='campaign', indicator=True)
 67 |     appsflyer['campaign'][appsflyer['_merge'] == 'both'] = appsflyer['campaign_name']
 68 |     appsflyer.drop(['campaign_name', '_merge'], inplace=True, axis=1)
 69 | 
 70 | 
 71 |     appsflyer['campaign'] = appsflyer['campaign'].fillna('None')
 72 |     appsflyer['source'] = appsflyer['source'].fillna('NA')
 73 |     appsflyer['tool'] = 'AppsFlyer'
 74 |     appsflyer['midia'] = 'nonpaid'
 75 |     define_values = define_values[define_values['app'] == 'driver']
 76 |     for i in ['source', 'midia']:
 77 |         define_aux = define_values[~define_values[i].isna()]
 78 |         for rows in [tuple(x) for x in define_aux.values]:
 79 |             if str(rows[2]) == 'nan':
 80 |                 appsflyer[i][appsflyer['campaign'].str.contains(rows[3], na=False)] = rows[
 81 |                     1 if i == 'source' else 0]
 82 |             else:
 83 |                 appsflyer[i][appsflyer['source'].str.contains(rows[2], na=False)] = rows[
 84 |                     1 if i == 'source' else 0]
 85 | 
 86 |     appsflyer['campaign'][appsflyer['source'].str.contains('Driver_Acq', na=False)] = \
 87 |         appsflyer[
 88 |         'source']
 89 | 
 90 |     appsflyer = appsflyer[['date','midia', 'tool', 'source', 'os_name', 'campaign', 'adgroup', 'installs', 'enviou_todos_docs']]
 91 | 
 92 |     appsflyer['date'] = pandas.to_datetime(pandas.Series(appsflyer['date']), format="%Y-%m-%d")
 93 |     for date in date_list:
 94 |         # concat everything on the go
 95 |         appsflyer_data = pandas.concat([appsflyer_data,
 96 |                                    pandas.merge(
 97 |                                        (appsflyer[appsflyer['date'] == date]),
 98 |                                        (custos[((custos['End Date'] >= date) | (custos['End Date'].isnull())) & (
 99 |                                                custos['Start Date'] <= date)]),
100 |                                        how='left',
101 |                                        left_on='campaign',
102 |                                        right_on='campaign')])
103 | 
104 |     appsflyer_data['amount_spent'] = 0
105 |     appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpi'] = appsflyer_data['installs'].astype(float) * appsflyer_data['payout'].astype(float)
106 |     appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpl'] = appsflyer_data['enviou_todos_docs'].astype(float) * appsflyer_data['payout'].astype(float)
107 | 
108 |     appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].astype(str)
109 |     appsflyer_data['amount_spent'][appsflyer_data['amount_spent'] == 'nan'] = '0.0'
110 |     appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].apply(lambda x: str(x.replace('.', ',')))
111 | 
112 |     appsflyer_data['installs'] = appsflyer_data['installs'].fillna(0)
113 |     appsflyer_data['criou_basic'] = 0
114 |     appsflyer_data['criou_basic'] = appsflyer_data['installs']*0.36
115 |     appsflyer_data['criou_basic'] = appsflyer_data['criou_basic'].apply(lambda x: round(x))
116 |     appsflyer_data['criou_basic'] = appsflyer_data['criou_basic'].astype(int)
117 | 
118 |     appsflyer_data.drop(['type', 'payout', 'Start Date', 'End Date'], inplace=True, axis=1)
119 | 
120 |     appsflyer_data = appsflyer_data.sort_values(by='date')
121 | 
122 |     logging.info("[DRIVER] Appsflyer Updated")
123 |     print('Driver - Appsflyer Updated')
124 | 
125 |     return appsflyer_data.drop_duplicates().reset_index(drop=True)
126 | 
127 | def retrieveAppsflyerInstalls():
128 |     conn = databseinit()
129 |     curNewUsers = conn.cursor()
130 |     curNewUsers.execute(QUERY_DRIVER_APPSFLYER_INSTALLS)
131 |     col_names = []
132 |     for x in curNewUsers.description:
133 |         col_names.append(x[0])
134 |     newusers = pandas.DataFrame(curNewUsers.fetchall(), columns=col_names)
135 |     return newusers
136 | 


--------------------------------------------------------------------------------
/drivers/database.py:
--------------------------------------------------------------------------------
  1 | from init.init import databseinit as databaseinit
  2 | from utils.paths import PATH_TO_DRIVER_CHANNEL_EQUIVALENCE, PATH_DEFINE_VALUES
  3 | from utils.queries import QUERY_DRIVER_SIGN_UP_NEWAPP, QUERY_DRIVER_NEW_REGULAR_NEWAPP, QUERY_DRIVER_DFT_NEWAPP
  4 | from utils.apicalls import GoogleAds
  5 | import unidecode
  6 | import pandas
  7 | import logging
  8 | 
  9 | 
 10 | def BaseNewApp():
 11 |     define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';')
 12 | 
 13 |     reports = pandas.DataFrame()
 14 |     for query in [QUERY_DRIVER_NEW_REGULAR_NEWAPP, QUERY_DRIVER_DFT_NEWAPP, QUERY_DRIVER_SIGN_UP_NEWAPP]:
 15 |         aux = runQuery(query)
 16 | 
 17 |         #primeiro leio o csv de consulta, para virar um dataframe
 18 |         driver_channels = pandas.read_csv(
 19 |             PATH_TO_DRIVER_CHANNEL_EQUIVALENCE, sep=';')
 20 | 
 21 |         #depois junto as tabelas
 22 |         aux = pandas.merge(aux, driver_channels, how='left', left_on='driver_channel', right_on='driver_channel',
 23 |                            indicator=True)
 24 | 
 25 |         #aqui eu atribuo primeiro ao campo "source" o campo "source_name" caso o "driver_channel" exista em ambas as tabelas.
 26 |         #faço o mesmo para o campo "campaign"
 27 |         aux['source'][aux['_merge'] == 'both'] = aux['source_name']
 28 |         aux['campaign'][aux['_merge'] == 'both'] = aux['campaign_name']
 29 |         aux.drop(['source_name', 'campaign_name', 'driver_channel', '_merge'], inplace=True, axis=1)
 30 | 
 31 | 
 32 |         reports = pandas.concat([reports, aux])
 33 | 
 34 |     google = GoogleAds()
 35 |     google = google.reportcampaigns()
 36 |     google.rename(
 37 |         columns={
 38 |             'Campaign': 'campaign_name',
 39 |             'Campaign ID': 'campaign'
 40 |         }, inplace=True)
 41 | 
 42 |     google = google.drop_duplicates(subset=['campaign'])
 43 | 
 44 |     reports = pandas.merge(reports, google, how='left', left_on='campaign', right_on='campaign', indicator=True)
 45 |     reports['campaign'][reports['_merge'] == 'both'] = reports['campaign_name']
 46 |     reports.drop(['campaign_name', '_merge'], inplace=True, axis=1)
 47 | 
 48 |     reports['region'] = reports['region'].str.upper()
 49 | 
 50 |     reports['tool'] = 'BaseNewApp'
 51 |     reports['midia'] = 'nonpaid'
 52 |     define_values = define_values[define_values['app'] == 'driver']
 53 |     for i in ['source', 'midia']:
 54 |         define_aux = define_values[~define_values[i].isna()]
 55 |         for rows in [tuple(x) for x in define_aux.values]:
 56 |             if str(rows[2]) == 'nan':
 57 |                 reports[i][reports['campaign'].str.contains(rows[3], na=False)] = rows[
 58 |                     1 if i == 'source' else 0]
 59 |             else:
 60 |                 reports[i][reports['source'].str.contains(rows[2], na=False)] = rows[
 61 |                     1 if i == 'source' else 0]
 62 | 
 63 |     reports['date'] = pandas.to_datetime(pandas.Series(reports['date']), format="%Y-%m-%d")
 64 |     reports['week'] = reports['date'].dt.week
 65 | 
 66 |     reports['region'] = reports['region'].apply(lambda x: unidecode.unidecode(x))
 67 | 
 68 |     reports = reports[['date', 'week', 'tool', 'midia', 'source', 'campaign', 'signups', 'signups_with_migration', 'regulars', 'regulars_with_migration', 'dft', 'dft_with_migration', 'region']]
 69 | 
 70 |     logging.info("[DRIVER] Database Updated")
 71 |     print('Driver - Database Updated')
 72 |     return reports
 73 | 
 74 | 
 75 | 
 76 | def runQuery(query):
 77 |     conn = databaseinit()
 78 |     cursor = conn.cursor()
 79 |     cursor.execute(query)
 80 |     col_names = []
 81 |     for x in cursor.description:
 82 |         col_names.append(x[0])
 83 |     data = pandas.DataFrame(cursor.fetchall(), columns=col_names)
 84 |     return data
 85 | 
 86 | 
 87 | def no_channelDistribution(data, distribute_by, columns_to_group, filter_by = None, drop_in_distribution = None, secondDataframe = None):
 88 |     data = data.fillna('None')
 89 |     if filter_by:
 90 |         column = 0
 91 |         value = 0
 92 |         for columns, values in filter_by.items():
 93 |             column = columns
 94 |             value = values
 95 |         onlyNA = data[(data[column] == value)]
 96 |         temponlyValidChannels = data[(data[column] != value)]
 97 |         if drop_in_distribution:
 98 |             drop_in_na = drop_in_distribution + [column]
 99 |             temponlyValidChannels.drop(drop_in_distribution, inplace=True, axis=1)
100 |             onlyNA.drop(drop_in_na, inplace=True, axis=1)
101 |         else:
102 |             drop_in_na = ['campaign']
103 |     else:
104 | 
105 |         onlyNA = secondDataframe
106 |         temponlyValidChannels = data
107 |         drop_in_na = ['campaign']
108 | 
109 |     onlyNA = onlyNA.groupby([x for x in columns_to_group if x not in drop_in_na]).sum()
110 |     onlyNA = onlyNA.reset_index()
111 |     temponlyValidChannels = temponlyValidChannels.groupby(columns_to_group).agg(
112 |         {[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]: 'sum'})
113 |     temponlyValidChannels = temponlyValidChannels.groupby([x for x in columns_to_group if x not in distribute_by]).apply(
114 |         lambda x: x / float(x.sum()))
115 |     temponlyValidChannels = temponlyValidChannels.reset_index()
116 | 
117 |     temponlyValidChannels.rename(columns={[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]: [x for x in list(temponlyValidChannels) if x not in columns_to_group][0]+'_%'}, inplace=True)
118 |     temponlyValidChannels = pandas.merge(onlyNA, temponlyValidChannels, how='left',
119 |                                          left_on=[item for item in columns_to_group if item not in distribute_by],
120 |                                          right_on=[item for item in columns_to_group if item not in distribute_by])
121 | 
122 |     temponlyValidChannels[[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]] = round(temponlyValidChannels[[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]] *
123 |                                                                                                             temponlyValidChannels[[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]+'_%'])
124 |     temponlyValidChannels.drop([[x for x in list(temponlyValidChannels) if x not in columns_to_group][0]+'_%'], inplace=True, axis=1)
125 | 
126 |     return temponlyValidChannels
127 | 


--------------------------------------------------------------------------------
/drivers/facebook.py:
--------------------------------------------------------------------------------
 1 | from utils.apicalls import FacebookAds
 2 | import utils.time
 3 | import logging
 4 | 
 5 | 
 6 | def facebookdata(ad_accounts):
 7 |     facebook = FacebookAds()
 8 |     facebook = facebook.reports(date_start=utils.time.startdate, date_end=utils.time.enddate, ad_accounts=ad_accounts, extracted=list())
 9 |     facebook['midia'] = 'paid'
10 |     facebook['source'] = 'facebook'
11 |     facebook['os_name'] = 'android'
12 |     facebook['network'] = 'Facebook Ads'
13 | 
14 |     facebook['source'][facebook['adset_name'].str.contains('_INS')] = 'instagram'
15 |     facebook['os_name'][facebook['campaign_name'].str.contains('IOS')] = 'ios'
16 |     facebook['network'][facebook['adset_name'].str.contains('_INS')] = 'Instagram Installs'
17 | 
18 |     facebook.rename(columns={'date_start': 'date', 'campaign_name': 'campaign', 'adset_name': 'adgroup', 'ad_name': 'creative', 'spend':'amount_spent'}, inplace=True)
19 |     facebook['tool'] = 'FaceAds'
20 |     facebook = facebook[
21 |         ['date','tool','midia', 'source', 'os_name', 'network',  'campaign', 'adgroup', 'creative', 'amount_spent', 'impressions', 'clicks']]
22 | 
23 |     logging.info("[DRIVER] Facebook Updated")
24 |     print('Driver - Facebook Updated')
25 |     return facebook
26 | 


--------------------------------------------------------------------------------
/drivers/google.py:
--------------------------------------------------------------------------------
 1 | from utils.apicalls import GoogleAds
 2 | import utils.time
 3 | from utils.paths import PATH_DEFINE_VALUES
 4 | import pandas
 5 | import logging
 6 | 
 7 | 
 8 | def googlereports(customer_ids):
 9 |     reports = pandas.DataFrame()
10 |     for customer_id in customer_ids:
11 |         data = GoogleAds()
12 |         data = data.reports(dateStart=utils.time.startdatebase, dateEnd=utils.time.enddatebase, customerId=customer_id)
13 | 
14 |         data.drop(['Clicks'], inplace=True, axis=1)
15 |         # Column creation
16 |         data['midia'] = 'paid'
17 |         data['source'] = 'Google'
18 |         data['network'] = 'Google Search - New Blue'
19 |         data['os_name'] = 'android'
20 | 
21 |         # Convert values based on data on created tables
22 | 
23 |         define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';')
24 |         define_values = define_values[(define_values['app'] == 'driver') & (~define_values['source'].isna()) &
25 |                                       (~define_values['campaign contains'].isna())]
26 | 
27 |         for rows in [tuple(x) for x in define_values.values]:
28 |             data['source'][data['Campaign'].str.contains(rows[3], na=False)] = rows[1]
29 | 
30 |         data['os_name'][data['Campaign'].str.contains('_iOS')] = 'ios'
31 |         data.rename(
32 |             columns={'Day': 'date',
33 |                      'Campaign': 'campaign',
34 |                      'Cost': 'amount_spent',
35 |                      'Impressions': 'impressions',
36 |                      'Interactions': 'clicks'
37 |                      }, inplace=True)
38 |         data['tool'] = 'Adwords'
39 |         reports = pandas.concat([reports, data])
40 | 
41 |     reports = reports[['date','tool', 'midia', 'source', 'os_name',
42 |                        'network', 'campaign', 'amount_spent', 'impressions', 'clicks']]
43 | 
44 |     logging.info("[DRIVER] Google Updated")
45 |     print('Driver - Google Updated')
46 |     return reports
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/drivers/performance.py:
--------------------------------------------------------------------------------
 1 | from utils.paths import PATH_SIGLAS_PRACAS
 2 | from drivers.appsflyer import appsflyerData as appsflyerDriversData
 3 | from drivers.facebook import facebookdata as facebookdrivers
 4 | from drivers.google import googlereports as googledrivers
 5 | from drivers.database import BaseNewApp as baseNewApp
 6 | import pandas
 7 | import unidecode
 8 | 
 9 | 
10 | def performanceDrivers():
11 |     performancedrivers = pandas.concat([
12 |         appsflyerDriversData(),
13 |         facebookdrivers(['act_2013665502000466', 'act_1550209891679365']),
14 |         googledrivers(['619-852-1756']),
15 |         baseNewApp()])
16 | 
17 |     performancedrivers['week'] = performancedrivers['date'].dt.week
18 |     performancedrivers = performancedrivers[['date', 'week', 'tool', 'midia', 'source', 'os_name', 'campaign', 'adgroup', 'creative', 'installs', 'criou_basic', 'enviou_todos_docs', 'amount_spent', 'impressions', 'clicks', 'signups', 'signups_with_migration', 'regulars', 'regulars_with_migration', 'dft', 'dft_with_migration', 'region']]
19 | 
20 |     performancedrivers['campaign'][performancedrivers['source'].str.contains('Driver_Acq', na=False)] = \
21 |         performancedrivers['source']
22 |     performancedrivers['source'][performancedrivers['source'].str.contains('Driver_Acq', na=False)] = \
23 |         performancedrivers['source'].apply(
24 |             lambda x: str(x)[str(x).find('_', 8) + 1:str(x).find('_', 11)] if str(x).count('_') > 2 else 'landing'
25 |         )
26 | 
27 |     if 'region' in performancedrivers.columns:
28 |         region = pandas.read_csv(PATH_SIGLAS_PRACAS, sep=';')
29 |         region['pracas'] = region['pracas'].str.upper()
30 |         listofregions = [tuple(x) for x in region.values]
31 |         performancedrivers['region'][performancedrivers['region'].isnull()] = 'BR'
32 |         for i in range(0, len(listofregions)):
33 |             performancedrivers['region'][performancedrivers['campaign'].str.contains(listofregions[i][0], na=False)] = \
34 |                 listofregions[i][1]
35 | 
36 |             performancedrivers['region'] = performancedrivers['region'].apply(lambda x: unidecode.unidecode(x) if x is not None else 'BR')
37 | 
38 |         performancedrivers['week'] = performancedrivers['date'].dt.week
39 |     return performancedrivers
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/init/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/init/__init__.py


--------------------------------------------------------------------------------
/init/init.py:
--------------------------------------------------------------------------------
 1 | from googleads import adwords
 2 | from facebook_business.api import FacebookAdsApi
 3 | from requests_oauthlib import OAuth1
 4 | from pydrive.auth import GoogleAuth
 5 | import psycopg2
 6 | import os
 7 | 
 8 | 
 9 | # Initialization class, used to initialize stuff
10 | def googledriveinit():
11 |     gauth = GoogleAuth()
12 |     gauth.LoadCredentialsFile('/'.join(os.path.realpath(__file__).replace('\\', '/').split('/')[:-2])
13 |                               + '/' + "credentials.json")
14 |     if gauth.credentials is None:
15 |         gauth.LocalWebserverAuth()
16 |     elif gauth.access_token_expired:
17 |         try:
18 |             gauth.Refresh()
19 |         except Exception as e:
20 |             print('[Exception] - Exception fired on Google Drive Init:' + str(e))
21 |             gauth.LocalWebserverAuth()
22 |     else:
23 |         gauth.Authorize()
24 | 
25 |     gauth.SaveCredentialsFile('/'.join(os.path.realpath(__file__).replace('\\', '/').split('/')[:-2])
26 |                               + '/' + "credentials.json")
27 |     return gauth
28 | 
29 | 
30 | # Initialize Twitter
31 | def twitterinit():
32 |     consumer_key = 'consumer_key'
33 |     consumer_secret = 'consumer_secret'
34 |     access_token = 'access_token'
35 |     access_token_secret = 'access_token_secret'
36 | 
37 |     auth = OAuth1(consumer_key, consumer_secret,
38 |                   access_token, access_token_secret)
39 |     return auth
40 | 
41 | 
42 | # Initialize Facebook
43 | def facebookinit():
44 |     my_app_id = 'app_id'
45 |     my_app_secret = 'app_secret'
46 |     my_access_token = 'access_token'
47 | 
48 |     FacebookAdsApi.init(my_app_id, my_app_secret, my_access_token)
49 | 
50 | 
51 | # Initialize Adwords
52 | # the client_customer_id is set to drivers account
53 | def adwordsinit(customerId=None):
54 |     adwords_client_string='''adwords:
55 |   developer_token: developer_token
56 |   client_customer_id: client_customer_id
57 |   client_id: client_id
58 |   client_secret: client_secret
59 |   refresh_token: refresh_token'''
60 |     adwords_client = adwords.AdWordsClient.LoadFromString(adwords_client_string)
61 |     if customerId:
62 |         adwords_client.SetClientCustomerId(customerId)
63 |     return adwords_client
64 | 
65 | 
66 | # Initialize Appsflyer
67 | def appsflyerinit():
68 |     return 'appsflyer_acces_token'
69 | 
70 | 
71 | # Initialize Database
72 | def databseinit():
73 |     try:
74 |         conn = psycopg2.connect(dbname='db_name', user='user',
75 |                                 host='host', password='password',
76 |                                 port='port')
77 |         return conn
78 |     except:
79 |         print("I am unable to connect to the database")
80 |         return None
81 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from drivers.performance import performanceDrivers as performancedrivers
 2 | from pax.performance import performanceNewAppData as performanceNewApp
 3 | from utils.paths import PATH_TO_RELATORIOS
 4 | from utils.drive import load_files, upload_files
 5 | import pandas
 6 | import logging
 7 | import warnings
 8 | 
 9 | warnings.filterwarnings("ignore")
10 | pandas.options.mode.chained_assignment = None
11 | 
12 | 
13 | load_files()
14 | logging.info("Secondary Files Downloaded")
15 | print('Secondary Files Downloaded')
16 | 
17 | performancedrivers().to_csv(PATH_TO_RELATORIOS + 'performancereportdrivers.csv', sep=',', float_format='%.0f', encoding='utf-8', index=False)
18 | logging.info("[DRIVER] Performance Report Generated")
19 | print('Driver - Performance Report Generated')
20 | 
21 | performanceNewApp().to_csv(PATH_TO_RELATORIOS + 'performancereportnewapp.csv', sep=',', float_format='%.0f', encoding='utf-8', index=False)
22 | logging.info("[PAX] Performance Report Generated")
23 | print('Pax - Performance Report Generated')
24 | 
25 | upload_files()
26 | logging.info("Performance Reports Files Uploaded")
27 | print('Performance Reports Files Uploaded')
28 | 


--------------------------------------------------------------------------------
/pax/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/pax/__init__.py


--------------------------------------------------------------------------------
/pax/appsflyer.py:
--------------------------------------------------------------------------------
  1 | import pandas
  2 | import numpy
  3 | import datetime
  4 | import utils.time
  5 | from utils.apicalls import AppsFlyer
  6 | from utils.queries import QUERY_PAX_APPSFLYER_INSTALLS
  7 | from init.init import databseinit
  8 | from utils.paths import PATH_CUSTOS_APPSFLYER, PATH_DEFINE_VALUES, PATH_CUSTOS_GMAPS
  9 | from utils.apicalls import GoogleAds
 10 | import logging
 11 | 
 12 | def appsflyerData():
 13 | 
 14 |     # method for retrieving and make some ETL process of the Appsflyer Data
 15 | 
 16 |     # first we have appsflyer_data which hold appsflyer's data
 17 |     # then define values, which is a dataframe for some Classification Process.
 18 |     # custos and custosgmaps is to define the costs for some campaigns, as costs for google maps and costs
 19 |     # for other campaigs is calculated differently, we need two different files.
 20 |     appsflyer_data = pandas.DataFrame()
 21 |     define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';')
 22 |     custosgmaps = pandas.read_csv(PATH_CUSTOS_GMAPS, sep=';')
 23 |     custos = pandas.read_excel(PATH_CUSTOS_APPSFLYER,
 24 |                                delimiter=';', keep_default_na=False,
 25 |                                na_values=['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A', 'N/A', '#NA', 'NULL',
 26 |                                           'NaN', '-NaN', 'nan', '-nan'])
 27 |     custos = custos.replace(r'', numpy.nan, regex=True)
 28 | 
 29 |     # format custos dataframe date, convert everything to datetime
 30 |     custos['End Date'] = pandas.to_datetime(pandas.Series(custos['End Date']), format="%d/%m/%Y")
 31 |     custos['Start Date'] = pandas.to_datetime(pandas.Series(custos['Start Date']), format="%d/%m/%Y")
 32 | 
 33 |     # config startdate and enddate and create a list of dates
 34 |     start = datetime.datetime.strptime(utils.time.startdate, "%Y-%m-%d")
 35 |     end = datetime.datetime.strptime(utils.time.enddate, "%Y-%m-%d")
 36 |     date_list = [start + datetime.timedelta(days=x) for x in range(0, (end - start).days + 1)]
 37 |     custos = custos[(custos['End Date'] > start) | (custos['End Date'].isnull())]
 38 | 
 39 |     # retrieve appsflyer Installations from our database
 40 |     appsflyer = retrieveAppsflyerInstalls()
 41 |     appsflyer.rename(
 42 |         columns={'install_time': 'date'}, inplace=True)
 43 | 
 44 |     # this is a dataframe passing daily_report, this is for retrieving clicks
 45 |     appsflyergmaps = AppsFlyer()
 46 |     appsflyergmaps = appsflyergmaps.reports(utils.time.startdate, utils.time.enddate,
 47 |                                             ['app_pax_android', 'app_pax_ios'],
 48 |                                             ['daily_report'])
 49 |     appsflyergmaps.rename(
 50 |         columns={
 51 |             'Date': 'date',
 52 |             'Media Source (pid)': 'source',
 53 |             'Campaign (c)': 'campaign',
 54 |             'Clicks': 'clicks'
 55 |         },
 56 |         inplace=True)
 57 |     appsflyergmaps['clicks'] = appsflyergmaps['clicks'].fillna(0)
 58 |     appsflyergmaps.drop([x for x in list(appsflyergmaps) if x not in ['date', 'source', 'campaign', 'clicks']], inplace=True, axis=1)
 59 | 
 60 |     # i just need the clicks from the source: "Google_Maps"
 61 |     appsflyer = pandas.concat([appsflyer, appsflyergmaps[appsflyergmaps['source']=='Google_Maps']])
 62 | 
 63 |     # appsflyerevents from orange and degrade app, the event is af_first_trip
 64 |     appsflyerfirst = AppsFlyer()
 65 |     appsflyerfirst = appsflyerfirst.reports(utils.time.startdate, utils.time.enddate,
 66 |                                             ['app_pax_android', 'app_pax_ios'],
 67 |                                             ['in_app_events_report', 'organic_in_app_events_report'],
 68 |                                             ['event_time', 'media_source', 'campaign', 'event_name'],
 69 |                                             ['app_pax_event'])
 70 |     appsflyerfirst['first_trip'] = 1
 71 |     appsflyerfirst.drop(['Event Name'], inplace=True, axis=1)
 72 |     appsflyerfirst['Campaign'] = appsflyerfirst['Campaign'].fillna('None')
 73 |     appsflyerfirst['Media Source'] = appsflyerfirst['Media Source'].fillna('NA')
 74 |     appsflyerfirst['Event Time'] = appsflyerfirst['Event Time'].apply(lambda x: str(x).split(' ')[0])
 75 |     appsflyerfirst = appsflyerfirst.groupby(['Event Time', 'Media Source', 'Campaign', 'os_name']).sum()
 76 |     appsflyerfirst = appsflyerfirst.reset_index()
 77 | 
 78 |     appsflyerfirst.rename(
 79 |         columns={
 80 |             'Event Time': 'date',
 81 |             'Media Source': 'source',
 82 |             'Campaign': 'campaign'
 83 |         }, inplace=True)
 84 | 
 85 | 
 86 |     # appsflyerevents from orange and degrade app, the event is af_sign_up
 87 |     appsflyersignup = AppsFlyer()
 88 |     appsflyersignup = appsflyersignup.reports(utils.time.startdate, utils.time.enddate,
 89 |                                               ['app_pax_android', 'app_pax_ios'],
 90 |                                               ['in_app_events_report', 'organic_in_app_events_report'],
 91 |                                               ['event_time', 'media_source', 'campaign', 'event_name'],
 92 |                                               ['app_pax_event_2'])
 93 |     appsflyersignup['sign_ups'] = 1
 94 | 
 95 |     appsflyersignup.drop(['Event Name'], inplace=True, axis=1)
 96 |     appsflyersignup['Campaign'] = appsflyersignup['Campaign'].fillna('None')
 97 |     appsflyersignup['Media Source'] = appsflyersignup['Media Source'].fillna('NA')
 98 |     appsflyersignup['Event Time'] = appsflyersignup['Event Time'].apply(lambda x: str(x).split(' ')[0])
 99 |     appsflyersignup = appsflyersignup.groupby(['Event Time', 'Media Source', 'Campaign', 'os_name']).sum()
100 |     appsflyersignup = appsflyersignup.reset_index()
101 | 
102 |     appsflyersignup.rename(
103 |         columns={
104 |             'Event Time': 'date',
105 |             'Media Source': 'source',
106 |             'Campaign': 'campaign'
107 |         }, inplace=True)
108 | 
109 |     appsflyerevents = pandas.concat([appsflyerfirst, appsflyersignup])
110 |     appsflyer = pandas.concat([appsflyerevents, appsflyer])
111 | 
112 |     google = GoogleAds()
113 |     google = google.reportcampaigns(customerId='771-742-8350')
114 |     google.rename(
115 |         columns={
116 |                  'Campaign': 'campaign_name',
117 |                  'Campaign ID': 'campaign'
118 |                 }, inplace=True)
119 | 
120 |     google = google.drop_duplicates(subset=['campaign'])
121 |     appsflyer = pandas.merge(appsflyer, google, how='left', left_on='campaign', right_on='campaign', indicator=True)
122 |     appsflyer['campaign'][appsflyer['_merge'] == 'both'] = appsflyer['campaign_name']
123 |     appsflyer.drop(['campaign_name', '_merge'], inplace=True, axis=1)
124 |     appsflyer['tool'] = 'AppsFlyer'
125 | 
126 |     appsflyer['midia'] = 'unpaid'
127 |     define_values = define_values[define_values['app'] == 'pax']
128 |     for i in ['source', 'midia']:
129 |         define_aux = define_values[~define_values[i].isna()]
130 |         for rows in [tuple(x) for x in define_aux.values]:
131 |             if str(rows[2]) == 'nan':
132 |                 appsflyer[i][appsflyer['campaign'].str.contains(rows[3], na=False)] = rows[
133 |                     1 if i == 'source' else 0]
134 |             else:
135 |                 appsflyer[i][appsflyer['source'].str.contains(rows[2], na=False)] = rows[
136 |                     1 if i == 'source' else 0]
137 | 
138 |     appsflyer = appsflyer[['date','tool','midia','source','os_name','campaign','adgroup','creative','installs', 'first_trip', 'sign_ups', 'clicks']]
139 | 
140 |     appsflyer['date'] = pandas.to_datetime(pandas.Series(appsflyer['date']), format="%Y-%m-%d")
141 |     for date in date_list:
142 |         # concat everything on the go
143 |         appsflyer_data = pandas.concat([appsflyer_data,
144 |                                    pandas.merge(
145 |                                        (appsflyer[appsflyer['date'] == date]),
146 |                                        (custos[((custos['End Date'] >= date) | (custos['End Date'].isnull())) & (
147 |                                                custos['Start Date'] <= date)]),
148 |                                        how='left',
149 |                                        left_on='campaign',
150 |                                        right_on='campaign')])
151 |     appsflyer_data = pandas.merge(appsflyer_data, custosgmaps, how='left', left_on='source', right_on='source')
152 | 
153 |     appsflyer_data['amount_spent'] = 0
154 |     appsflyer_data['sign_ups'] = appsflyer_data['sign_ups'].fillna(0)
155 |     appsflyer_data['clicks'] = appsflyer_data['clicks'].fillna(0)
156 |     appsflyer_data['amount_spent'][appsflyer_data['typeg'] == 'cpg'] = round((appsflyer_data['sign_ups'].astype(float) *
157 |                                                                        appsflyer_data['cost_phone_activated'].astype(
158 |                                                                            float)) + (
159 |                                                                               appsflyer_data['clicks'].astype(float) *
160 |                                                                               appsflyer_data['cost_click'].astype(
161 |                                                                                   float)))
162 | 
163 |     appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpi'] = appsflyer_data['installs'].astype(float) * appsflyer_data['payout'].astype(float)
164 |     appsflyer_data['amount_spent'][appsflyer_data['type'] == 'cpa'] = appsflyer_data['first_trip'].astype(float) * appsflyer_data['payout'].astype(float)
165 |     appsflyer_data.drop(['type', 'typeg'], inplace=True, axis=1)
166 | 
167 | 
168 | 
169 |     appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].astype(str)
170 |     appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].apply(lambda x: str(x.replace('nan', '0.0')))
171 |     appsflyer_data['amount_spent'] = appsflyer_data['amount_spent'].apply(lambda x: str(x.replace('.', ',')))
172 |     appsflyer_data.drop(['payout', 'Start Date', 'End Date', 'cost_phone_activated', 'cost_click', 'clicks'], inplace=True, axis=1)
173 | 
174 |     logging.info("[PAX] Appsflyer Updated")
175 |     print('Pax - Appsflyer Updated')
176 |     return appsflyer_data.drop_duplicates().reset_index(drop=True)
177 | 
178 | 
179 | def retrieveAppsflyerInstalls():
180 |     conn = databseinit()
181 |     curNewUsers = conn.cursor()
182 |     curNewUsers.execute(QUERY_PAX_APPSFLYER_INSTALLS)
183 |     col_names = []
184 |     for x in curNewUsers.description:
185 |         col_names.append(x[0])
186 |     newusers = pandas.DataFrame(curNewUsers.fetchall(), columns=col_names)
187 |     return newusers
188 | 


--------------------------------------------------------------------------------
/pax/database.py:
--------------------------------------------------------------------------------
 1 | import pandas
 2 | from init.init import databseinit as databaseinit
 3 | from utils.time import startdate
 4 | from utils.time import enddate
 5 | from utils.queries import QUERY_PAX_NEWAPP
 6 | from utils.paths import PATH_DEFINE_VALUES
 7 | import numpy
 8 | import logging
 9 | 
10 | 
11 | def baseDatabase():
12 |     #reportnewApppaxData = pandas.read_csv(r'/Users/nicolasmelo1/Desktop/ToDab Corrected.csv', sep=',')
13 |     define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';')
14 |     reportnewApppaxData = extractDatabasePFTsNewApp()
15 |     if reportnewApppaxData.empty:
16 |         reports = reportnewApppaxData
17 |         reports['date'] = None
18 |         reports['tool'] = None
19 |         reports['midia'] = None
20 |         reports['source'] = None
21 |         reports['pax'] = None
22 |         reports['trips'] = None
23 |         reports['burn'] = None
24 |         reports['gmv'] = None
25 |         reports['takerate'] = None
26 |         reports['pft_pax'] = None
27 |         reports['pft_trips'] = None
28 |         reports['pft_burn'] = None
29 |         reports['pft_gmv'] = None
30 |         reports['pft_takerate'] = None
31 |         reports['region'] = None
32 |         return reports
33 |     reportnewApppaxData.columns = ['call_date', 'channel', 'metropolitan_area_name', 'pax', 'trips', 'burn', 'gmv',
34 |                                    'takerate', 'call_date.1', 'channel.1', 'metropolitan_area_name.1', 'pft_pax',
35 |                                    'pft_trips', 'pft_burn', 'pft_gmv', 'pft_takerate']
36 | 
37 |     reportsPFT = pandas.DataFrame()
38 |     reportsPFT = reportsPFT.append(reportnewApppaxData)
39 |     reportnewApppaxData.drop(
40 |         ['call_date.1', 'channel.1', 'metropolitan_area_name.1', 'pft_pax', 'pft_trips', 'pft_burn', 'pft_gmv',
41 |          'pft_takerate'], inplace=True, axis=1)
42 |     reportsPFT.drop(['call_date', 'channel', 'metropolitan_area_name', 'pax', 'trips', 'burn', 'gmv', 'takerate'],
43 |                     inplace=True, axis=1)
44 | 
45 |     reportsPFT.rename(columns={'call_date.1': 'call_date', 'channel.1': 'channel',
46 |                                'metropolitan_area_name.1': 'metropolitan_area_name'}, inplace=True)
47 | 
48 |     reportsPFT = reportsPFT.dropna(axis=0, how='all')
49 | 
50 |     reports = pandas.concat([reportnewApppaxData, reportsPFT])
51 |     reports['channel'] = reports['channel'].replace(r'', numpy.nan, regex=True)
52 |     reports = reports.replace(numpy.nan, 0, regex=True)
53 |     reports['channel'][reports['channel'] == 0] = 'Glispa'
54 |     reports['metropolitan_area_name'][reports['metropolitan_area_name'] == 0] = 'BR'
55 |     reports['metropolitan_area_name'][reports['metropolitan_area_name'].str.contains('Campos')] = 'SAO JOSE DOS CAMPOS'
56 |     reports['metropolitan_area_name'] = reports['metropolitan_area_name'].str.upper()
57 | 
58 |     reports.rename(columns={'channel': 'source'}, inplace=True)
59 | 
60 |     reports['source'][reports['source'].str.contains('google')] = 'Google'
61 | 
62 | 
63 |     reports['tool'] = 'Base'
64 |     reports['midia'] = 'unpaid'
65 |     define_values = define_values[(define_values['app'] == 'pax') & (define_values['source'].isna()) & (
66 |     define_values['campaign contains'].isna())]
67 |     for rows in [tuple(x) for x in define_values.values]:
68 |         reports['midia'][reports['source'].str.contains(rows[2], na=False)] = rows[0]
69 | 
70 |     #reports['midia'][reports['campaign'].str.contains('Perf')] = 'paid'
71 |     #reports['midia'][reports['campaign'].str.contains('Brand')] = 'paid'
72 |     reports['midia'][reports['source'].str.contains('Organic')] = 'organic'
73 | 
74 |     reports['call_date'] = pandas.to_datetime(pandas.Series(reports['call_date']), format="%Y-%m-%d")
75 |     #reports['week'] = reports['call_date'].dt.week
76 | 
77 |     reports.rename(columns={'call_date': 'date',
78 |                                'metropolitan_area_name': 'region'}, inplace=True)
79 |     reports = reports[
80 |         ['date', 'tool', 'midia', 'source', 'pax', 'trips', 'burn', 'gmv', 'takerate', 'pft_pax',
81 |          'pft_trips', 'pft_burn', 'pft_gmv', 'pft_takerate', 'region']]
82 |     reports = reports[(reports['date'] >= startdate) & (reports['date'] <= enddate)]
83 |     logging.info("[PAX] Database Updated")
84 |     print('Pax - Database Updated')
85 |     return reports
86 | 
87 | 
88 | 
89 | def extractDatabasePFTsNewApp():
90 |     conn = databaseinit()
91 |     curNewUsers = conn.cursor()
92 |     curNewUsers.execute(QUERY_PAX_NEWAPP)
93 |     newusers = pandas.DataFrame(curNewUsers.fetchall())
94 |     return newusers
95 | 
96 | 


--------------------------------------------------------------------------------
/pax/facebook.py:
--------------------------------------------------------------------------------
 1 | from utils.apicalls import FacebookAds
 2 | import utils.time
 3 | import logging
 4 | # Data organization and return full Facebook Data
 5 | 
 6 | 
 7 | def facebookdata(ad_accounts):
 8 |     facebook = FacebookAds()
 9 |     facebook = facebook.reports(date_start=utils.time.startdate, date_end=utils.time.enddate, ad_accounts=ad_accounts, extracted=list())
10 |     facebook['tool'] = 'FaceAds'
11 |     facebook['source'] = 'Facebook Ads'
12 |     facebook['os_name'] = 'android'
13 |     facebook['midia'] = 'paid'
14 |     facebook['os_name'][facebook['campaign_name'].str.contains('iOS')] = 'ios'
15 |     facebook['os_name'][facebook['campaign_name'].str.contains('IOS')] = 'ios'
16 | 
17 |     facebook.rename(columns={'date_start': 'date', 'campaign_name': 'campaign', 'adset_name': 'adgroup',
18 |                              'ad_name': 'creative', 'spend': 'amount_spent'}, inplace=True)
19 |     facebook = facebook[
20 |         ['date', 'tool', 'midia', 'source', 'os_name', 'campaign', 'adgroup', 'creative', 'amount_spent', 'impressions',
21 |          'clicks']]
22 | 
23 |     logging.info("[PAX] Facebook Updated")
24 |     print('Pax - Facebook Updated')
25 |     return facebook
26 | 


--------------------------------------------------------------------------------
/pax/google.py:
--------------------------------------------------------------------------------
 1 | from utils.apicalls import GoogleAds
 2 | from utils.paths import PATH_DEFINE_VALUES
 3 | import utils.time
 4 | import pandas
 5 | import logging
 6 | 
 7 | def googlereports(customer_ids):
 8 |     reports=pandas.DataFrame()
 9 |     for customer_id in customer_ids:
10 |         data = GoogleAds()
11 |         data = data.reports(dateStart=utils.time.startdatebase, dateEnd=utils.time.enddatebase, customerId=customer_id)
12 | 
13 |         data.drop(['Interactions'], inplace=True, axis=1)
14 |         #Column creation
15 |         data['midia'] = 'paid'
16 |         data['source'] = 'Google'
17 |         data['os_name'] = 'android'
18 | 
19 |         #Convert values based on data on created tables
20 |         define_values = pandas.read_csv(PATH_DEFINE_VALUES, sep=';')
21 |         define_values = define_values[(define_values['app'] == 'pax') & (~define_values['source'].isna()) & (~define_values['campaign contains'].isna())]
22 | 
23 |         for rows in [tuple(x) for x in define_values.values]:
24 |             data['source'][data['Campaign'].str.contains(rows[3], na=False)] = rows[1]
25 | 
26 | 
27 |         data['os_name'][data['Campaign'].str.contains('iOS')] = 'ios'
28 |         data.rename(
29 |             columns={'Day': 'date',
30 |                      'Campaign': 'campaign',
31 |                      'Cost': 'amount_spent',
32 |                      'Impressions': 'impressions',
33 |                      'Clicks': 'clicks'
34 |                      }, inplace=True)
35 |         data['tool'] = 'Adwords'
36 |         reports = pandas.concat([reports, data])
37 | 
38 |     reports = reports[['date', 'tool', 'midia', 'source', 'os_name',
39 |                        'campaign', 'amount_spent', 'impressions', 'clicks']]
40 |     logging.info("[PAX] Google Updated")
41 |     print('Pax - Google Updated')
42 |     return reports
43 | 


--------------------------------------------------------------------------------
/pax/performance.py:
--------------------------------------------------------------------------------
 1 | from pax.appsflyer import appsflyerData
 2 | from pax.facebook import facebookdata
 3 | from pax.google import googlereports
 4 | from pax.database import baseDatabase
 5 | from utils.paths import PATH_SIGLAS_PRACAS
 6 | from pax.twitter import twitterdata
 7 | import unidecode
 8 | import pandas
 9 | import utils.time
10 | pandas.options.mode.chained_assignment = None
11 | 
12 | 
13 | def cleanNewAppData(performance):
14 | 
15 |     performance['os_name'][performance['campaign'].str.contains('iOS', na=False)] = 'ios'
16 |     performance['os_name'][performance['campaign'].str.contains('IOS', na=False)] = 'ios'
17 | 
18 |     performance['campaign'][performance['campaign'] == 'GDN_And_MKT_BH-CPI924382686'] = 'GDN_And_MKT_BH-CPI'
19 |     performance['campaign'][performance['campaign'] == 'GDN_And_MKT_VIX-CPC931642203'] = 'GDN_And_MKT_VIX-CPC'
20 |     performance['campaign'][performance['campaign'] == 'GDN_And_MKT_VIX-CPI930974807'] = 'GDN_And_MKT_VIX-CPI'
21 |     performance['campaign'][performance['campaign'] == 'SMS_All_Ops_CWB/market://details?id=com.app99.pax'] = 'SMS_All_Ops_CWB'
22 |     performance['campaign'][performance['campaign'] == 'Spotify_All_Brand_BH-overlay'] = 'Spotify_All_Brand_BH'
23 |     performance['campaign'][performance['campaign'] == 'SRC_And_Conc-GYN_UA929042102'] = 'SRC_And_Conc-GYN_UA'
24 | 
25 |     performance['campaign'][performance['campaign'] == 'RG-CWB-IOS-AppInstal'] = 'RG-CWB-IOS-AppInstall'
26 | 
27 |     if 'region' in performance.columns:
28 |         region = pandas.read_csv(PATH_SIGLAS_PRACAS, sep=';')
29 |         region['pracas'] = region['pracas'].str.upper()
30 |         listofregions = [tuple(x) for x in region.values]
31 | 
32 |         performance['region'][performance['region'].isnull()] = 'BR'
33 |         for i in range(0, len(listofregions)):
34 |             performance['region'][performance['campaign'].str.contains(listofregions[i][0], na=False)] = \
35 |             listofregions[i][1]
36 | 
37 |         performance['region'] = performance['region'].apply(lambda x: unidecode.unidecode(x))
38 | 
39 |     performance['week'] = performance['date'].dt.week
40 | 
41 |     return performance
42 | 
43 | 
44 | def performanceNewAppData():
45 |     performance = pandas.concat([
46 |         appsflyerData(),
47 |         twitterdata(),
48 |         facebookdata(['act_1894184000615284', 'act_1691552937545059', 'act_967083766658650']),
49 |         googlereports(['771-742-8350', '411-922-6657']),
50 |         baseDatabase()
51 |     ])
52 |     performance = cleanNewAppData(performance)
53 |     performance = performance[
54 |         ['date', 'week', 'tool', 'midia', 'source', 'os_name', 'campaign', 'adgroup', 'creative',
55 |          'installs', 'first_trip', 'sign_ups', 'amount_spent', 'impressions', 'clicks', 'pax', 'trips', 'burn', 'gmv',
56 |          'takerate', 'pft_pax',
57 |          'pft_trips', 'pft_burn', 'pft_gmv', 'pft_takerate', 'region']]
58 |     return performance
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/pax/twitter.py:
--------------------------------------------------------------------------------
 1 | from utils.time import startdate
 2 | from utils.apicalls import TwitterAds
 3 | import utils.time
 4 | import logging
 5 | 
 6 | 
 7 | def twitterdata():
 8 |     twitter = TwitterAds()
 9 |     twitter = twitter.reports(dateStart=utils.time.startdate, dateEnd=utils.time.enddate)
10 |     twitter.drop(['id'], inplace=True, axis=1)
11 |     twitter['tool'] = 'TwitterAds'
12 |     twitter['source'] = 'Twitter'
13 |     twitter['os_name'] = 'android'
14 |     twitter['midia'] = 'paid'
15 |     twitter['os_name'][twitter['name'].str.contains('iOS')] = 'ios'
16 |     twitter['os_name'][twitter['name'].str.contains('IOS')] = 'ios'
17 | 
18 |     twitter.rename(columns={'billed_charge_local_micro': 'amount_spent', 'name': 'campaign'}, inplace=True)
19 | 
20 |     twitter = twitter[
21 |         ['date', 'tool', 'midia', 'source', 'os_name', 'campaign', 'amount_spent', 'impressions',
22 |          'clicks']]
23 | 
24 |     logging.info("[PAX] Twitter Updated")
25 |     print('Pax - Twitter Updated')
26 |     return twitter
27 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | attrs==18.1.0
 3 | cached-property==1.4.3
 4 | cachetools==2.1.0
 5 | certifi==2018.4.16
 6 | chardet==3.0.4
 7 | defusedxml==0.5.0
 8 | facebook-business==3.0.0
 9 | facebookads==2.11.4
10 | google-api-python-client==1.7.3
11 | google-auth==1.5.0
12 | google-auth-httplib2==0.0.3
13 | google-auth-oauthlib==0.2.0
14 | googleads==12.1.0
15 | httplib2==0.11.3
16 | idna==2.7
17 | isodate==0.6.0
18 | lxml==4.2.1
19 | numpy==1.14.5
20 | oauth2client==4.1.2
21 | oauthlib==2.1.0
22 | pandas==0.23.1
23 | psycopg2==2.7.5
24 | pyasn1==0.4.3
25 | pyasn1-modules==0.2.1
26 | PyDrive==1.3.1
27 | python-dateutil==2.7.3
28 | pytz==2018.4
29 | PyYAML==3.12
30 | requests==2.19.1
31 | requests-oauthlib==1.0.0
32 | requests-toolbelt==0.8.0
33 | rsa==3.4.2
34 | six==1.11.0
35 | suds-jurko==0.6
36 | Unidecode==1.0.22
37 | uritemplate==3.0.0
38 | urllib3==1.23
39 | xmltodict==0.11.0
40 | zeep==3.0.0
41 | 


--------------------------------------------------------------------------------
/staticfiles/reports/performancereportdrivers.csv:
--------------------------------------------------------------------------------
1 | date,week,tool,midia,source,os_name,campaign,adgroup,creative,installs,criou_basic,enviou_todos_docs,amount_spent,impressions,clicks,signups,signups_with_migration,regulars,regulars_with_migration,dft,dft_with_migration,region


--------------------------------------------------------------------------------
/staticfiles/reports/performancereportnewapp.csv:
--------------------------------------------------------------------------------
1 | date,week,tool,midia,source,os_name,campaign,adgroup,creative,installs,first_trip,sign_ups,amount_spent,impressions,clicks,pax,trips,burn,gmv,takerate,pft_pax,pft_trips,pft_burn,pft_gmv,pft_takerate,region


--------------------------------------------------------------------------------
/staticfiles/tables/custos_appsflyer_newapp.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/staticfiles/tables/custos_appsflyer_newapp.xlsx


--------------------------------------------------------------------------------
/staticfiles/tables/custos_gmaps.csv:
--------------------------------------------------------------------------------
1 | source;typeg;cost_click;cost_phone_activated


--------------------------------------------------------------------------------
/staticfiles/tables/media_and_source_by_source_and_campaign_names.csv:
--------------------------------------------------------------------------------
1 | midia;source;source contains;campaign contains;app
2 | ;Exemple;;Exemp;pax
3 | pago;;Exemp;GDN;pax


--------------------------------------------------------------------------------
/staticfiles/tables/siglas_pracas.csv:
--------------------------------------------------------------------------------
1 | sigla;pracas
2 | SJC;Sao Jose dos Campos


--------------------------------------------------------------------------------
/staticfiles/tables/source_and_campaigns_by_driverchanelid.csv:
--------------------------------------------------------------------------------
1 | driver_channel;source_name;campaign_name
2 | 100000;Example;Example


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nicolasmelo1/marketing-performance-report/796534beeb729a38a142ae4a099a378c7eae6f99/utils/__init__.py


--------------------------------------------------------------------------------
/utils/apicalls.py:
--------------------------------------------------------------------------------
  1 | from facebook_business.adobjects import adsinsights
  2 | from facebook_business.adobjects import adaccount
  3 | from init.init import adwordsinit
  4 | from init.init import twitterinit
  5 | from init.init import facebookinit
  6 | from init.init import appsflyerinit
  7 | from pandas.io.json import json_normalize
  8 | import requests
  9 | import pandas
 10 | import datetime
 11 | import io
 12 | import time
 13 | import random
 14 | import facebookads.exceptions
 15 | 
 16 | #########################################
 17 | ####                                 ####
 18 | ####   This is for extracting data   ####
 19 | ####                                 ####
 20 | #########################################
 21 | 
 22 | 
 23 | # class to make facebook api calls
 24 | class FacebookAds:
 25 |     def __init__(self):
 26 |         facebookinit()
 27 | 
 28 |     def reports(self, date_start, date_end, ad_accounts, extracted, reports=pandas.DataFrame()):
 29 |         ad_account = random.choice(ad_accounts)
 30 |         while ad_account in extracted:
 31 |             ad_account = random.choice(ad_accounts)
 32 | 
 33 |         print("[Facebook] - Extracting data for ad_account=%s" % (ad_account))
 34 |         account = adaccount.AdAccount(ad_account)
 35 | 
 36 |         insights = account.get_insights(fields=[
 37 |             adsinsights.AdsInsights.Field.date_start,
 38 |             adsinsights.AdsInsights.Field.campaign_name,
 39 |             adsinsights.AdsInsights.Field.adset_name,
 40 |             adsinsights.AdsInsights.Field.ad_name,
 41 |             adsinsights.AdsInsights.Field.impressions,
 42 |             adsinsights.AdsInsights.Field.clicks,
 43 |             adsinsights.AdsInsights.Field.spend,
 44 |         ], params={
 45 |             'level': adsinsights.AdsInsights.Level.ad,
 46 |             'time_increment': '1',
 47 |             'time_range': {
 48 |                 'since': date_start,
 49 |                 'until': date_end
 50 |             },
 51 |         }, async=True)
 52 | 
 53 |         time.sleep(20)
 54 | 
 55 |         results = []
 56 |         async_job = insights.remote_read()
 57 |         while async_job['async_status'] != 'Job Completed':
 58 |             print('[Facebook] - Percent completed from async run=' + str(async_job['async_percent_completion']))
 59 |             time.sleep(20)
 60 |             async_job = insights.remote_read()
 61 |         if async_job['async_status'] == 'Job Completed':
 62 |             print('[Facebook] - Percent completed from async run=' + str(async_job['async_percent_completion']))
 63 |             time.sleep(20)
 64 |             results = [x for x in insights.get_result()]
 65 | 
 66 |         if results:
 67 |             facebook = pandas.DataFrame(results,
 68 |                                         columns=['ad_name', 'adset_name', 'campaign_name', 'clicks', 'date_start',
 69 |                                                  'date_stop', 'impressions', 'spend'])
 70 |             facebook['spend'] = facebook['spend'].apply(lambda x: str(x.replace('.', ',')))
 71 |             facebook = facebook[
 72 |                 ['date_start', 'date_stop', 'campaign_name', 'adset_name', 'ad_name', 'impressions', 'clicks', 'spend']]
 73 |             facebook.drop(['date_stop'], inplace=True, axis=1)
 74 |             facebook['date_start'] = pandas.to_datetime(pandas.Series(facebook['date_start']), format="%Y-%m-%d")
 75 |             facebook = facebook.sort_values(by='date_start')
 76 |             reports = pandas.concat([reports, facebook])
 77 | 
 78 |         else:
 79 |             facebook = pandas.DataFrame(results,
 80 |                                         columns=['ad_name', 'adset_name', 'campaign_name', 'clicks', 'date_start',
 81 |                                                  'date_stop', 'impressions', 'spend'])
 82 |             reports = pandas.concat([reports, facebook])
 83 | 
 84 |         extracted.append(ad_account)
 85 |         if sorted(extracted) != sorted(ad_accounts):
 86 |             return self.reports(date_start, date_end, ad_accounts, extracted, reports)
 87 |         else:
 88 |             return reports
 89 | 
 90 | # class to make google api calls
 91 | class GoogleAds:
 92 | 
 93 |     def reports(self, dateStart, dateEnd, customerId=None):
 94 |         print("[Google] - Extracting reports data from customer_id=%s" % (customerId))
 95 |         # initialize adwords
 96 |         adwords = adwordsinit(customerId)
 97 | 
 98 |         # watch the version, right now the version is 'v201710' it needs to be updated constantly.
 99 |         report_downloader = adwords.GetReportDownloader(version='v201802')
100 | 
101 |         # it's just a query, you can check more about the parameters here:
102 |         # https://developers.google.com/adwords/api/docs/appendix/reports/campaign-performance-report?hl=pt-br#bidtype
103 |         report_query = ('SELECT Date, CampaignName, Impressions, Interactions, Clicks, Cost '
104 |                       'FROM CAMPAIGN_PERFORMANCE_REPORT '
105 |                       'DURING ' + dateStart + ', ' + dateEnd)
106 | 
107 |         stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query,'CSV')
108 | 
109 |         # convert stream data to pandas, it doesn't give me a dataframe.
110 |         data = pandas.read_csv(stream_data,
111 |                            sep=str(','),
112 |                            encoding='utf-8-sig',
113 |                            header=1,
114 |                            error_bad_lines=False,
115 |                            warn_bad_lines=False)
116 | 
117 |         # Cost come as millions, the real data is the number divided by 1.000.000
118 |         data['Cost'] = data['Cost'].apply(lambda x: round(x/1000000))
119 | 
120 |         # Delete last line {line from total values of google]
121 |         data = data[:-1]
122 | 
123 |         # Convert to datetime and sort it
124 |         data['Day'] = pandas.to_datetime(pandas.Series(data['Day']), format="%Y-%m-%d")
125 |         data = data.sort_values(by='Day')
126 |         return data
127 | 
128 | 
129 |     #google api calls to extract campaign names with campaign ids
130 |     def reportcampaigns(self, customerId=None):
131 |         print("[Google] - Extracting campaigns from customer_id=%s" % (customerId))
132 |         adwords = adwordsinit(customerId)
133 |         report_downloader = adwords.GetReportDownloader(version='v201802')
134 |         report_query = ('SELECT CampaignName, CampaignId '
135 |                         'FROM CAMPAIGN_PERFORMANCE_REPORT')
136 |         stream_data = report_downloader.DownloadReportAsStreamWithAwql(report_query, 'CSV')
137 | 
138 |         data = pandas.read_csv(stream_data,
139 |                                sep=str(','),
140 |                                encoding='utf-8-sig',
141 |                                header=1,
142 |                                error_bad_lines=False,
143 |                                warn_bad_lines=False)
144 |         data = data[:-1]
145 |         return data
146 | 
147 | 
148 | # class to make twitter api calls
149 | class TwitterAds:
150 | 
151 |     # to get the campaigns of twitter using twitter api you'll want to get your hands "dirty"
152 |     # different than google or fb api, twitter ads api is quite new and doesn't come with a lot of tools out of the box
153 |     # you need to build it yourself
154 |     def reportcampaigns(self, dateStart):
155 |         # this gets all the campaigns and campaignids that we made with the account
156 |         getcampaigns = 'https://ads-api.twitter.com/2/accounts/18ce54np2w4/campaigns'
157 |         content = requests.get(getcampaigns, auth=twitterinit()).json()
158 |         campaigns = json_normalize(content, ['data'])
159 | 
160 |         campaigns.drop(['updated_at', 'total_budget_amount_local_micro', 'start_time', 'standard_delivery', 'servable',
161 |                         'funding_instrument_id', 'frequency_cap', 'entity_status', 'duration_in_days', 'deleted',
162 |                         'daily_budget_amount_local_micro', 'currency', 'created_at', 'account_id'],
163 |                        inplace=True,
164 |                        axis=1)
165 |         campaigns['end_time'] = campaigns['end_time'].apply(lambda x: str(x).split('T')[0] if x is not None else None)
166 |         campaigns['end_time'] = pandas.to_datetime(pandas.Series(campaigns['end_time']), format="%Y-%m-%d")
167 | 
168 |         campaigns = campaigns[(campaigns['end_time'] >= dateStart) | (campaigns['reasons_not_servable'] != 'EXPIRED')]
169 |         campaigns.drop(['end_time', 'reasons_not_servable'], inplace=True, axis=1)
170 |         return campaigns
171 | 
172 | 
173 |     def recursiveextractor(self, datestart, dateend, campaignslist, placement, twitterdataframe=pandas.DataFrame()):
174 | 
175 |         # this is the coolest part of the program. what happens is that, twitter doesn't give me by default
176 |         # twitter data divided by date. so what i need to do is to do it myself.
177 |         # i'll get then data from twitter from 24h date range. so i'll loop through this func until datestart is equal to dateend
178 | 
179 |         lastdate = datetime.datetime.strptime(datestart, "%Y-%m-%d") + datetime.timedelta(days=1)
180 |         lastdate = str(lastdate).partition(" ")[0]
181 |         # make the call
182 |         content = requests.get('https://ads-api.twitter.com/2/stats/accounts/18ce54np2w4/', auth=twitterinit(),
183 |                                params={
184 |                                    'start_time': datestart+'T00:00:00-0300',
185 |                                    'end_time': str(lastdate)+'T00:00:00-0300',
186 |                                    'entity': 'CAMPAIGN',
187 |                                    'granularity': 'TOTAL',
188 |                                    'metric_groups': 'ENGAGEMENT,BILLING',
189 |                                    'placement': placement,
190 |                                    'entity_ids': campaignslist
191 |                                }).json()
192 | 
193 |         # some json partition and normalization to convert it to dataframe
194 |         campaignid = json_normalize(content, ['data'])
195 |         campaignid.drop(['id_data'], inplace=True, axis=1)
196 |         twitterreport = json_normalize(content['data'], 'id_data')
197 |         twitterreport = pandas.concat(
198 |             [twitterreport.drop('metrics', axis=1), pandas.DataFrame(twitterreport['metrics'].tolist())], axis=1)
199 | 
200 |         # drop what i don't need
201 |         twitterreport.drop(['card_engagements', 'carousel_swipes', 'engagements', 'follows', 'likes', 'poll_card_vote',
202 |                             'qualified_impressions', 'billed_engagements',
203 |                             'replies', 'app_clicks', 'segment', 'tweets_send', 'url_clicks', 'retweets'], inplace=True, axis=1)
204 | 
205 |         # as date is something i've came up with, to get this data divided by date i need to put it myself on the DF
206 |         twitterreport['date'] = datestart
207 | 
208 | 
209 |         # some etl process in the metrics fields
210 |         twitterreport['billed_charge_local_micro'] = twitterreport['billed_charge_local_micro'].apply(
211 |             lambda x: 0 if x == None else int(round(sum(x)) / 1000000))
212 |         twitterreport['impressions'] = twitterreport['impressions'].apply(lambda x: 0 if x == None else sum(x))
213 |         twitterreport['clicks'] = twitterreport['clicks'].apply(lambda x: 0 if x == None else sum(x))
214 | 
215 |         twitterreport = pandas.concat([campaignid, twitterreport], axis=1)
216 | 
217 | 
218 |         # twitter dataframe is what contains all the dataframes from each day
219 |         twitterdataframe = twitterdataframe.append(twitterreport, ignore_index=True)
220 | 
221 |         if dateend == datestart:
222 |             return twitterdataframe
223 |         else:
224 |             return self.recursiveextractor(str(lastdate), dateend, campaignslist, placement, twitterdataframe)
225 | 
226 | 
227 |     def reports(self, dateStart, dateEnd):
228 |         print("[Twitter] - Extracting twitter data")
229 |         # this is what you call when you create a twitter object to retrieve the data
230 |         # you need to pass only the Start Date and End Date Parameter
231 |         # first you create an empty dataframe, that`ll hold the data inside the for loop
232 |         twitter = pandas.DataFrame()
233 | 
234 |         # what this does is returning a dataframe with the following columns: campaign id and campaign name
235 |         # in order to extract the performance data, you have to pass the campaign id you are trying to get the values
236 |         # so what i do is extract all of our campaigns in this account
237 |         twitterCampaigns = self.reportcampaigns(dateStart)
238 | 
239 |         # exceded number of campaigns is for saying that the number of campaigns i'm trying to retrieve the data is bigger than 20.
240 |         # if we have more than 20 campaigns running we need to split it.
241 |         excededNumberOfCampaigns = None
242 | 
243 |         # this converts the campaigns ids to a list
244 |         twitterCampaignsList = twitterCampaigns['id'].tolist()
245 | 
246 | 
247 |         # the placement is for saying that: you want campaigns from where?
248 |         # "we have campaigns that can be distributed in our platform or outside, with third party publishers"
249 |         # as we want both we create a list containing both.
250 |         # This is because we can't make the twitter call passing 'ALL_ON_TWITTER' and 'PUBLISHER_NETWORK' in the same time.
251 |         # we will need to make two calls
252 |         placements = ['ALL_ON_TWITTER', 'PUBLISHER_NETWORK']
253 | 
254 | 
255 |         # where the magic happens, first we need to loop through placements
256 |         for placement in placements:
257 |             twitterReportsDataFrame = pandas.DataFrame()
258 | 
259 |             # as i said, if it's bigger than 20 you need to split it, and there comes the campaignsList variable
260 |             if len(twitterCampaignsList) > 20:
261 |                 excededNumberOfCampaigns = True
262 |                 campaignsList = twitterCampaignsList[:20]
263 |             else:
264 |                 campaignsList = twitterCampaignsList
265 | 
266 |             # list to string, so i can make the call
267 |             campaigns = ','.join(campaignsList)
268 | 
269 |             # as i said if the excededNumberOfCampaigns is true we need to split it.
270 |             # the idea is simple: while twitterCampaignsList exists and it's not an empty list
271 |             # you'll want to get only the values that aren't in campaignsList, that's why you override the variable with new values
272 |             # then if twitterCampaignsList is still bigger than 20, you split it again consecutivelly.
273 |             # you need the aux variable to don't override twitterCampaignsList, so on the next placement, you can start it off again
274 |             # it's cool to see that, you get out of the while when the excededNumberOfCampaigns is False, so when campaignsList is less
275 |             # than 20 in length, the final call happens outside the if clause, on the next step
276 |             if excededNumberOfCampaigns is True:
277 |                 auxTwitterCampaignsList = twitterCampaignsList
278 |                 while excededNumberOfCampaigns is True:
279 |                     twitterReportsDataFrame = pandas.concat([twitterReportsDataFrame, self.recursiveextractor(dateStart,
280 |                                                                                                               dateEnd,
281 |                                                                                                               campaigns,
282 |                                                                                                               placement)])
283 |                     auxTwitterCampaignsList = [x for x in auxTwitterCampaignsList if x not in campaignsList]
284 |                     if len(auxTwitterCampaignsList) > 20:
285 |                         excededNumberOfCampaigns = True
286 |                         campaignsList = auxTwitterCampaignsList[:20]
287 |                     else:
288 |                         excededNumberOfCampaigns = False
289 |                         campaignsList = auxTwitterCampaignsList
290 |                     campaigns = ','.join(campaignsList)
291 | 
292 |             # in case it`s less than 20 campaigns you doesn't enter the if clause, so you just want to get the data
293 |             twitterReportsDataFrame = pandas.concat([twitterReportsDataFrame, self.recursiveextractor(dateStart,
294 |                                                                                                               dateEnd, campaigns, placement)])
295 | 
296 |             # remember our dataframe containing ids and campaign names? you use it to get the name of the campaigns.
297 |             twitterreport = pandas.merge(twitterReportsDataFrame,
298 |                                twitterCampaigns, how='inner', left_on='id', right_on='id')
299 | 
300 | 
301 |             twitter = pandas.concat([twitter, twitterreport])
302 |         twitter['date'] = pandas.to_datetime(pandas.Series(twitter['date']), format="%Y-%m-%d")
303 |         twitter = twitter.sort_values(by='date')
304 |         return twitter
305 | 
306 | 
307 | #class to make adjust api calls
308 | class Adjust:
309 | 
310 |     #this is to group the data, but as i stated the data already comes grouped
311 |     '''
312 |     groupingCategorizer = [
313 |         ('date', ['day', 'hour', 'week', 'month']),
314 |         ('tracker_name', ['trackers']),
315 |         ('network', ['networks']),
316 |         ('campaign', ['campaigns']),
317 |         ('adgroup', ['adgroups']),
318 |         ('creative', ['creatives']),
319 |         ('country', ['countries']),
320 |         ('device_type', ['device_types']),
321 |         ('region', ['region']),
322 |         ('os_name', ['os_names'])
323 |     ]
324 |     '''
325 |     def reports(self, dateStart, dateEnd, appid, grouping, eventKpis, kpis=None, trackerFilter=None):
326 |         eventKpisList = []
327 | 
328 | 
329 |         for data in eventKpis:
330 |             eventKpisList.append(data)
331 | 
332 | 
333 |         kpis = ['installs'] if kpis is None else kpis
334 |         trackerFilter = [''] if trackerFilter is None else trackerFilter
335 | 
336 |         trackerFilter = ','.join(trackerFilter)
337 |         kpis = ','.join(kpis)
338 |         eventKpisList = ','.join(eventKpisList)
339 |         groupingList = ','.join(grouping)
340 |         content = requests.get('https://api.adjust.com/kpis/v1/' + appid + '.csv',
341 |                                             params={
342 |                                                 'start_date': dateStart,
343 |                                                 'end_date': dateEnd,
344 |                                                 'kpis': kpis,
345 |                                                 'event_kpis': eventKpisList,
346 |                                                 'user_token': 'user_token',
347 |                                                 'grouping': groupingList,
348 |                                                 'tracker_filter': trackerFilter
349 |                                             }).content
350 |         data = pandas.read_csv(io.StringIO(content.decode('utf-8')))
351 |         data = data.rename(columns=eventKpis)
352 |         data.drop(['tracker_token'], inplace=True, axis=1)
353 |         data['date'] = pandas.to_datetime(pandas.Series(data['date']), format="%Y-%m-%d")
354 |         data = data.sort_values(by='date')
355 | 
356 |         return data
357 | 
358 | 
359 | #class to make appsflyer api calls
360 | class AppsFlyer:
361 |     def reports(self, dateStart, dateEnd, appNames, callTypes, fields=None, eventName=None, countApp=0, countCall=0, data=pandas.DataFrame(), saveEndDate=None):
362 | 
363 |         print("[Appsflyer] - Extracting appsflyer data for app_name=%s and call_type=%s" %
364 |               (appNames[countApp], callTypes[countCall]))
365 | 
366 |         # this initializes every variable passed
367 |         eventNameList = ','.join(eventName) if eventName is not None else ''
368 |         fieldsList = ','.join(fields) if fields is not None else ''
369 |         field = 'fields' if fields else ''
370 |         events = 'event_name' if 'in_app' in callTypes[countCall] else ''
371 |         dataFromCall = pandas.DataFrame()
372 |         #the call get made to contentappsflyer
373 |         try:
374 |             contentappsflyer = requests.get('https://hq.appsflyer.com/export/' + appNames[countApp] + '/' + callTypes[countCall] + '/v5',
375 |                                             params={
376 |                                                 'api_token': appsflyerinit(),
377 |                                                 'timezone': '-03:00',
378 |                                                 'from': dateStart,
379 |                                                 'to': dateEnd,
380 |                                                 field: fieldsList,
381 |                                                 events: eventNameList
382 |                                             }).content
383 |             # tranform the data recieved to dataframe
384 |             dataFromCall = pandas.read_csv(io.StringIO(contentappsflyer.decode('utf-8')))
385 |             if callTypes[countCall] in ['installs_report', 'in_app_events_report', 'uninstall_events_report', 'organic_installs_report', 'organic_in_app_events_report']:
386 |                 dataFromCall['Media Source'] = dataFromCall['Media Source'].fillna('no_channel')
387 |                 # some classification processes
388 |                 # the first is for which operational system is it for
389 |                 # the second is to set values if organic raw data gets called
390 |                 if 'organic' in callTypes[countCall]:
391 |                     dataFromCall['Media Source'] = 'Organic'
392 |                     dataFromCall['Campaign'] = '0'
393 |                     dataFromCall['Adset'] = '0'
394 |             if 'com.' in appNames[countApp]:
395 |                 dataFromCall['os_name'] = 'android'
396 |             else:
397 |                 dataFromCall['os_name'] = 'ios'
398 |         except:
399 |             self.reports(dateStart=dateStart, dateEnd=dateEnd, appNames=appNames, callTypes=callTypes, fields=fields,
400 |                          eventName=eventName, countApp=countApp,
401 |                          countCall=countCall,
402 |                          data=data, saveEndDate=saveEndDate)
403 |         # The func has two counters in it so it can iterate over
404 |         # This means the data retrieved is valid, so you go for the loop, there are 2 loops in this func, first run one then the other
405 |         if len(dataFromCall.index) < 200000:
406 |             #the app name was completed
407 |             if len(appNames) == countApp+1:
408 |                 #the loop was completed
409 |                 if len(callTypes) == countCall+1:
410 | 
411 |                     data = pandas.concat([data, dataFromCall])
412 |                     #now we return the data and get out of the func
413 |                     return data
414 |                 else:
415 |                     # If saveStartDate exists it replaces dateEnd parameter and sets itself to None so it can run nicely
416 |                     if saveEndDate:
417 |                         dateEnd= saveEndDate
418 |                         saveEndDate = None
419 |                     else:
420 |                         pass
421 |                     # The call fields are for the second loop, so for every new iteration the counter for appNames gets restarted and for calls gets added 1
422 |                     data = pandas.concat([data, dataFromCall])
423 |                     return self.reports(dateStart, dateEnd, appNames, callTypes, fields, eventName, countApp=0,
424 |                              countCall=countCall+1,
425 |                              data=data, saveEndDate=saveEndDate)
426 |             else:
427 |                 # If saveStartDate exists it replaces dateEnd parameter and sets itself to None so it can run nicely [2
428 |                 if saveEndDate:
429 |                     dateEnd = saveEndDate
430 |                     saveEndDate = None
431 |                 else:
432 |                     pass
433 |                 data = pandas.concat([data, dataFromCall])
434 |                 # The app fields are for the first loop, so for every new iteration the counter for calls stays the same and for app it adds 1
435 |                 return self.reports(dateStart, dateEnd, appNames, callTypes, fields=fields, eventName=eventName, countApp=countApp+1, countCall=countCall, data=data, saveEndDate=saveEndDate)
436 |         else:
437 |             # Where the magic happens, and why this is a recursive func
438 |             # If the dataFrame have 2000000 rows it runs again without changing the counters
439 |             # Also, the dateEnd parameter gets updated with the time of the last row of the dataFrame
440 |             saveEndDate = dateEnd if saveEndDate is None else saveEndDate
441 |             dateEnd = dataFromCall['Event Time' if 'event_time' in fields else 'Install Time'].iloc[-1][:-3]
442 | 
443 |             # The time comes as yyyy-mm-dd hh:mm:ss so i take out of the string ':ss' to update dateEnd parameter
444 |             data = pandas.concat([data, dataFromCall])
445 |             return self.reports(dateStart=dateStart, dateEnd=dateEnd, appNames=appNames, callTypes=callTypes, fields=fields, eventName=eventName, countApp=countApp,
446 |                          countCall=countCall,
447 |                          data=data, saveEndDate=saveEndDate)
448 | 
449 | 


--------------------------------------------------------------------------------
/utils/drive.py:
--------------------------------------------------------------------------------
 1 | from utils.paths import all_paths_tabelas, all_paths_relatorios
 2 | from init.init import googledriveinit
 3 | from pydrive.drive import GoogleDrive
 4 | import pandas
 5 | import os
 6 | 
 7 | 
 8 | def load_files():
 9 |     files_to_load = [f for f in os.listdir(all_paths_tabelas) if not f.startswith('.')]
10 | 
11 |     drive = GoogleDrive(googledriveinit())
12 | 
13 |     file_list = drive.ListFile({'q': "'folder_id' in parents and trashed=false"}).GetList()
14 |     for file1 in file_list:
15 |         print('[Google Drive] - File to load: %s, id: %s' % (file1['title'], file1['id']))
16 |         if file1['title'] in files_to_load:
17 |             file = drive.CreateFile({'id': file1['id']})
18 |             file.GetContentFile(all_paths_tabelas + file1['title'])
19 | 
20 | 
21 | def upload_files():
22 |     files_to_upload = [f for f in os.listdir(all_paths_relatorios) if not f.startswith('.')]
23 | 
24 |     drive = GoogleDrive(googledriveinit())
25 | 
26 |     file_list = drive.ListFile({'q': "'folder_id' in parents and trashed=false"}).GetList()
27 |     for file1 in file_list:
28 |         print('[Google Drive] - File to upload: %s, id: %s' % (file1['title'], file1['id']))
29 |         if file1['title'] in files_to_upload:
30 |             file = drive.CreateFile({'id': file1['id']})
31 |             file.SetContentFile(all_paths_relatorios + file1['title'])
32 |             file['title'] = file1['title']
33 |             file.Upload()
34 | 


--------------------------------------------------------------------------------
/utils/paths.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # This is step is for getting the path to the folders, you have 2 aux folders
 4 | # one is called Tabelas which are auxiliary tables, the other is Relatorios
 5 | # where you'll put all the generated csvs
 6 | 
 7 | 
 8 | # This is the main path, the others are static and doesn't change
 9 | 
10 | path = '/'.join(os.path.realpath(__file__).replace('\\', '/').split('/')[:-2])
11 | 
12 | '''
13 | ############################################
14 | ###    !!!!!    DON'T CHANGE     !!!!!   ###
15 | ############################################
16 | '''
17 | 
18 | #This is the paths to write and read reports or tables
19 | all_paths_tabelas = path + r'/staticfiles/tables/'
20 | all_paths_relatorios = path + r'/staticfiles/reports/'
21 | 
22 | '''
23 | ############################################
24 | ###                 READ                 ###
25 | ############################################
26 | '''
27 | 
28 | #PAX OLD APP
29 | PATH_CUSTOS_ADJUST = all_paths_tabelas + 'custos_adjust.csv'
30 | PATH_CUSTOS_GMAPS = all_paths_tabelas + 'custos_gmaps.csv'
31 | 
32 | #DRIVERS
33 | PATH_CUSTOS_ADJUST_DRIVERS = all_paths_tabelas + 'custos_adjust_drivers.csv'
34 | PATH_TO_DRIVER_CHANNEL_EQUIVALENCE = all_paths_tabelas + 'source_and_campaigns_by_driverchanelid.csv'
35 | 
36 | #PAX AND DRIVERS
37 | PATH_CUSTOS_APPSFLYER = all_paths_tabelas + 'custos_appsflyer_newapp.xlsx'
38 | PATH_DEFINE_VALUES = all_paths_tabelas + 'media_and_source_by_source_and_campaign_names.csv'
39 | #PAX
40 | PATH_SIGLAS_PRACAS = all_paths_tabelas + 'siglas_pracas.csv'
41 | 
42 | 
43 | '''
44 | ############################################
45 | ###                WRITE                 ###
46 | ############################################
47 | '''
48 | 
49 | #AUTOMATICALLY
50 | PATH_TO_CUSTOS_ADJUST = all_paths_tabelas + 'custos_adjust.csv'
51 | PATH_TO_CUSTOS_ADJUST_DRIVERS = all_paths_tabelas + 'custos_adjust_drivers.csv'
52 | 
53 | #MANUAL
54 | PATH_TO_RELATORIOS = all_paths_relatorios


--------------------------------------------------------------------------------
/utils/queries.py:
--------------------------------------------------------------------------------
 1 | import utils.time
 2 | 
 3 | import datetime
 4 | 
 5 | # Only Queries
 6 | 
 7 | '''
 8 | ############################################
 9 | ###              FUNCTIONS               ###
10 | ############################################
11 | '''
12 | datestart = datetime.datetime.strptime(utils.time.startdate, "%Y-%m-%d") - datetime.timedelta(days=30)
13 | datestart = str(datestart).partition(" ")[0]
14 | 
15 | # EXAMPLE
16 | '''
17 | ############################################
18 | ###               DRIVERS                ###
19 | ############################################
20 | '''
21 | #BASE OLD APP
22 | QUERY_DRIVER_FIRST_TRIP = """SELECT *
23 |     WHERE  date_value BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
24 |     GROUP BY 1, 2, 3, 4, 5, 6
25 |     ORDER BY 1, 2"""
26 | QUERY_DRIVER_NEW_REGULAR = """SELECT *
27 | WHERE date_value BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
28 | GROUP BY 1, 2, 3, 4, 5, 6
29 | ORDER BY 1, 2"""
30 | QUERY_DRIVER_SIGN_UP = """SELECT *
31 | WHERE  date_value BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
32 | GROUP BY 1, 2, 3, 4, 5, 6
33 | ORDER BY 1, 2"""
34 | 
35 | #COHORT OLD APP
36 | QUERY_DRIVER_COHORT = """
37 | SELECT
38 |  where to_char((install_date::DATE),'yyyy-MM-dd') BETWEEN '""" + datestart + """' AND '""" + utils.time.enddate + """'
39 |    and activity_kind = 'install'
40 | group by 1,2,3
41 | order by 1,2,3"""
42 | 
43 | #BASE NEW APP
44 | QUERY_DRIVER_DFT_GMV_NEWAPP = """
45 | SELECT *
46 | WHERE (to_date((ddb.reg_time), 'YYYY-MM-DD HH24:MI:SS') - INTERVAL '11 hours') :: DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
47 | GROUP BY 1, 2, 3, 4, 5
48 | ORDER BY 1
49 | """
50 | 
51 | QUERY_DRIVER_NEW_REGULAR_NEWAPP = """
52 | SELECT *
53 | WHERE (to_date((ddb.work_time), 'YYYY-MM-DD HH24:MI:SS') -
54 |       INTERVAL '11 hours') :: DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
55 | GROUP BY 1, 2, 3, 4, 5
56 | ORDER BY 1
57 | """
58 | 
59 | QUERY_DRIVER_APPSFLYER_INSTALLS = """SElECT *
60 | WHERE app_id IN ('apps_drivers') and CONVERT_TIMEZONE('GMT', 'America/Sao_Paulo',install_time::timestamp)::DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
61 | GROUP BY 1,2,3,4,5
62 | ORDER BY 1,2"""
63 | 
64 | '''
65 | ############################################
66 | ###                 PAX                  ###
67 | ############################################
68 | '''
69 | 
70 | #NEW APP
71 | QUERY_PAX_NEWAPP = """
72 | SELECT *"""
73 | 
74 | QUERY_PAX_APPSFLYER_INSTALLS = """SELECT *
75 | WHERE app_id IN ('apps_pax') and install_time::DATE BETWEEN '""" + utils.time.startdate + """' AND '""" + utils.time.enddate + """'
76 | GROUP BY 1,2,3,4,5,6
77 | ORDER BY 1,2"""
78 | 
79 | 
80 | TESTE_QUERY_PAX_MAU_REGIAO = """SELECT *
81 |                         WHERE r.call_date BETWEEN '""" + utils.time.startdate + ' 00:00:00' + """"' AND '""" + utils.time.enddate + """' + ' 23:59:59'
82 |                         GROUP BY 1, 2, 3, 4, 5
83 |                         ORDER BY 1, 2"""
84 | 


--------------------------------------------------------------------------------
/utils/time.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import datetime
 3 | 
 4 | # This is only to get the current time. We're only looking for reports inside our current month.
 5 | # dateStart is always day 1 of the month we are currently in, the only if is if today is day 1, then we close the month.
 6 | # dateEnd is always day-1
 7 | # the xxxxdate() method returns date as yyyy-MM-dd (used in adjust, facebook and some querys of our database)
 8 | # the xxxxdatebase() method returns date as yyyyMMdd (used in google and some of our database querys)
 9 | 
10 | 
11 | def datestart(currentDate = None):
12 |     #if today is the first day of the month we will get the first day of the last month.
13 | 
14 |     if currentDate:
15 |         if currentDate.day == 1:
16 |             today = '01'
17 |             currmonth = currentDate.month
18 |             month = int(currmonth) - 1
19 |             year = currentDate.year
20 |             if month < 10:
21 |                 return str(year) + '-' + '0' + str(month) + '-' + str(today)
22 |             else:
23 |                 return str(year) + '-' + str(month) + '-' + str(today)
24 |         else:
25 |             today = '01'
26 |             currdate = str(currentDate.year) + '-' + (str(currentDate.month) if currentDate.month > 9 else '0' + str(currentDate.month))
27 |             return currdate + '-' + str(today)
28 |     else:
29 |         if time.strftime("%d") == '01':
30 |             today = '01'
31 |             currmonth = time.strftime('%m')
32 |             month = int(currmonth) - 1
33 |             year = time.strftime('%Y')
34 |             if month < 10:
35 |                 return year + '-' + '0' + str(month) + '-' + today
36 |             else:
37 |                 return year + '-' + str(month) + '-' + today
38 |         else:
39 |             today = '01'
40 |             currdate = time.strftime("%Y-%m")
41 |             return currdate + '-' + str(today)
42 | 
43 | 
44 | def dateend():
45 |     datetoend = str(datetime.datetime.now() - datetime.timedelta(days=1)).partition(" ")[0]
46 |     return datetoend
47 | 
48 | 
49 | def datetobase(date):
50 |     return date.replace('-', '')
51 | 
52 | 
53 | startdate = datestart()
54 | startdatebase = datetobase(startdate)
55 | enddate = dateend()
56 | enddatebase = datetobase(enddate)
57 | 


--------------------------------------------------------------------------------