Source code for nys_parole_scraper.nys_parole_scraper

# -*- coding: utf-8 -*-
"""
@author: khayes
"""

# import libraries
from selenium import webdriver
import time
import pandas as pd
from selenium.webdriver.common.by import By
import numpy as np
import os
from datetime import date
from datetime import datetime
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from nys_parole_scraper import scraper_functions as sf
# changed from just " import scraper_functions as sf "
### making this change, and moving all files from src>nys_parole_scraper to just src, and then importing scraper as "from  nys_parole_scraper.nys_parole_scraper import parole_scraper" (alternatively could do "from nys_scraper import nys_scraper" and then use nys_scraper.parole_scraper as function call). This change allows both this to work as well as pytest
from selenium.webdriver.firefox.options import Options

[docs]def parole_scraper(file_path, directory): """ Scrapes NYS parole information from the NYS DOCCS parolee lookup website based on user inputs and returns a clean dataset and summary statistics. Parameters ---------- file_path : String A string representing the path of the CSV file that includes the identifying information of the individuals to be searched for with the scraper. See Readme for more information of the input csv file construction. directory : String A string representing the folder path where the user would like an output folder created, in which the final dataset and summary statistics will be exported. Returns ------- To return the following python objects as DataFrames, assign the function to two variables (ex: df1, df2 = parole_scraper(file_path, directory)). To only export the dataframes to the provided directory, you do not need to assign the function to variables (ex: parole_scraper(file_path, directory)) full_output : pandas DataFrame, CSV The concatenated parole information of all provided individuals found in the DOCCS parole database. The returned dataframe will take on whatever name you assign first to the the function. Example: df1, df2 = parole_scraper(file_path, directory) full_output will be returned as df1 full_putput will also be exported as a csv to an output folder in the directory provided by the directory parameter. stats_list: List, Excel A list of the Following DataFrames, returned as the second varible assigned to the function call. All summary statistics and frequency table objects will be exported in separate sheets of an Excel file in the same output folder. stats_numeric : pandas DataFrame Summary statistics on age; length of time between release to parole and current date in months; and number of convictions race_freq : pandas DataFrame Frequency table of race/ethncity age_freq : pandas DataFrame Frequency table of age groups pstatus_freq : pandas DataFrame Frequency table of current parole status county_unique_freq : pandas DataFrame Frequency table of unique individuals with convictions in each county top_charge_freq : pandas DataFrame Frequency table of top charges convictions_freq : pandas DataFrame Frequency table of unique individuals per charge type (every conviction, not only top charge) Example -------- Returning DataFrames as objects: >>> from parole_scrpaer_MDS import parole_scraper >>> file_path = "C:/Users/parole_scraping.csv" >>> directory = "C:/Users/Output_Folder" >>> df1, df_list = parole_scraper(file_path, directory) df1 = full_output df_list = stats_list full_output exported as CSV stats_list exported as Excel Without returning DataFrames as objects: >>> from parole_scrpaer_MDS import parole_scraper >>> file_path = "C:/Users/parole_scraping.csv" >>> directory = "C:/Users/Output_Folder" >>> parole_scraper(file_path, directory) full_output exported as CSV stats_list exported as Excel """ if isinstance(file_path, int): raise ValueError('File_path argument must be a string') if isinstance(file_path, float): raise ValueError('File_path argument must be a string') if isinstance(directory, int): raise ValueError('directory argument must be a string') if isinstance(directory, float): raise ValueError('directory argument must be a string') assert os.path.isfile(file_path) != False, "file_path argument must be a path to an existing file. Please check your input." assert os.path.isdir(directory) != False, "directory argument must be the path to an existing directory. Please check your input." #create date and time variable and make directory name now = datetime.now() dyn_dir_name = now.strftime("%Y%m%d_%H.%M.%S") # create today variables for adding to dataset and computing date differences now_today = date.today() today_str = now_today.strftime("%m/%d/%Y") today = datetime.strptime(today_str, "%m/%d/%Y") #create directory global output_dir output_dir = directory+"/Output_"+dyn_dir_name if not os.path.exists(output_dir): os.makedirs(output_dir) #Import file with scraping info as csv data_csv = pd.read_csv(file_path, delimiter=',', dtype='str') #strip white spaces for col in ('NYSID', 'First Name', 'Last Name', 'DOB', 'Unique ID'): data_csv[col] = data_csv[col].astype(str).str.strip() #rename columns data_csv = data_csv.rename(columns={'First Name': 'first', 'Last Name': 'last', 'DOB': 'dob', 'Unique ID': 'id'}) # create year column data_csv['year'] = data_csv['dob'].str.extract(r'(\d\d\d\d)$') # create df of nysids and id only nysids_start = data_csv.drop(columns=['first', 'last', 'dob']) # keep only NYSIDS with 9 didgits followed by letter nysids = nysids_start[nysids_start['NYSID'].str.contains(r'^\d\d\d\d\d\d\d\d[A-Za-z]$')] # keep only NYSIDS that do not have "00000" - these are fake nysids given by state when NYSID is unknown nysids = nysids[nysids['NYSID'].str.contains(r'^(?!00000)')] # outer merge with data_csv outer_join = data_csv.merge(nysids, how = 'outer', indicator = True) # anti-join to get names that need to be scraped names = outer_join[~(outer_join._merge == 'both')].drop('_merge', axis = 1) # specify the url (DOCCS) urlpage = 'https://publicapps.doccs.ny.gov/ParoleeLookup/default' # run firefox webdriver from executable path global driver options = Options() options.headless = True driver = webdriver.Firefox(options=options) # get web page driver.get(urlpage) # sleep for 5s to allow page to fully load time.sleep(5) # create emtpy df and list to append individuals' scraped data to as dataframes global dataframe dataframe = pd.DataFrame() global df_list df_list = [] # global wait global wait wait = WebDriverWait(driver, 30) #========================================================================= #LOOP BEGINS HERE #========================================================================= #Wait until page is fully loaded and nysid field is available for x,e in zip(nysids['NYSID'], nysids['id']): wait.until(EC.visibility_of_element_located(( By.XPATH, "//*[@id='MainContent_txtNysid']"))) #find nysid entry by xpath nysid_search = driver.find_element( By.XPATH, "//*[@id='MainContent_txtNysid']") # find submit button by xpath_homepage submit = driver.find_element(By.XPATH, "//*[@id='MainContent_BtnSubmit']") # Enter nysid in first searchbar nysid_search.send_keys(x) # Click Submit submit.click() #Check if nysid was not found try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_paroleeInformation']") except NoSuchElementException: wait.until(EC.visibility_of_element_located(( By.XPATH, "//*[@id='MainContent_NewSearch']"))) newsearch = driver.find_element(By.XPATH, "//*[@id='MainContent_NewSearch']") newsearch.click() continue #========================================================================== ## SCRAPE DATA: #========================================================================== ##Table 1 - "ParoleeInformation" dataframe = sf.scrape_table1(driver, dataframe) ##Table 2 - "SupervisionInformation" #######Check that Table 2 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_supervisionInformation']/tbody/tr[1]/td[1]") except NoSuchElementException: try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: # if neither supervision nor offense tables exist #export_nysid_df(dataframe) df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #if offense table exists: dataframe = sf.scrape_table3(driver, dataframe, e) #export to csv: df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue ##If Table 2 (supervision info) exists, scrape: dataframe = sf.scrape_table2(driver, dataframe) ##Table 3 - "OffenseInformation" #######Check that Table 3 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: #export df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #If Table 3 exists, scrape: dataframe = sf.scrape_table3(driver, dataframe, e) #####Done Scraping df_list.append(dataframe) sf.new_search(driver, wait) #============================================================================== # LOOP THROUGH NAMES #============================================================================== for a,b,c,d,e in zip(names['first'], names['last'], names['dob'], names['year'], names['id']): wait.until(EC.visibility_of_element_located(( By.XPATH, "//*[@id='MainContent_txtFName']"))) #find first name entry by xpath first_search = driver.find_element( By.XPATH, "//*[@id='MainContent_txtFName']") #find last name entry by xpath last_search = driver.find_element( By.XPATH, "//*[@id='MainContent_txtLName']") #find last name entry by xpath year_search = driver.find_element(By.XPATH, "//*[@id='MainContent_txtYob']") # find submit button by xpath_homepage submit = driver.find_element(By.XPATH, "//*[@id='MainContent_BtnSubmit']") # Enter first name in first searchbar first_search.send_keys(a) # Enter last name in first searchbar last_search.send_keys(b) # Enter year in year searchbar year_search.send_keys(d) # Click Submit submit.click() #Check if name was not found try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_manyResultsDiv']") except NoSuchElementException: sf.new_search(driver, wait) continue try: # check if there is a DOB match on page 1 of results check = driver.find_element(By.XPATH, "//tr[td='" + c + "']") except NoSuchElementException: try: # if no DOB match, check if page 2 exists wait.until(EC.visibility_of_element_located(( By.XPATH, "//*[@id='MainContent_navRowTable']"))) check = driver.find_element(By.LINK_TEXT, '2') except NoSuchElementException: # if page 2 does not exist, restart loop sf.new_search(driver, wait) continue # if page 2 exists, go to page 2 page = driver.find_element(By.LINK_TEXT, '2').click() # check if there are DOB matches on page two try: check = driver.find_element(By.XPATH, "//tr[td='" + c + "']") except NoSuchElementException: try: # if no DOB match, check if page 3 exists wait.until(EC.visibility_of_element_located(( By.XPATH, "//*[@id='MainContent_navRowTable']"))) check = driver.find_element(By.LINK_TEXT, '3') except NoSuchElementException: # if page 3 does not exist, restart loop sf.new_search(driver, wait) continue # if page 3 exists, go to page 3 page = driver.find_element(By.LINK_TEXT, '3').click() # check if there are DOB matches on page 3 try: check = driver.find_element(By.XPATH, "//tr[td='" + c + "']") except NoSuchElementException: try: # if no DOB match, check if page 4 exists wait.until(EC.visibility_of_element_located(( By.XPATH, "//*[@id='MainContent_navRowTable']"))) check = driver.find_element(By.LINK_TEXT, '4') except NoSuchElementException: # if page 4 does not exist, restart loop sf.new_search(driver, wait) continue # if page 4 exists, go to page 4 page = driver.find_element(By.LINK_TEXT, '4').click() try: check = driver.find_element(By.XPATH, "//tr[td='" + c + "']") except NoSuchElementException: # if DOB match not found continue sf.new_search(driver, wait) continue #============================================================= #SCRAPE RESULT FROM PAGE 4 #============================================================= #click on name link = driver.find_element(By.XPATH, "//tr[td='" + c + "']/td/preceding-sibling::td[3]") link.click() ##Table 1 - "ParoleeInformation" dataframe = sf.scrape_table1(driver, dataframe) ##Table 2 - "SupervisionInformation" #######Check that Table 2 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_supervisionInformation']/tbody/tr[1]/td[1]") except NoSuchElementException: try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: # if neither supervision nor offense tables exist df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #if offense table exists: dataframe = sf.scrape_table3(driver, dataframe, e) #export to csv: df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue ##If Table 2 (supervision info) exists, scrape: dataframe = sf.scrape_table2(driver, dataframe) ##Table 3 - "OffenseInformation" #######Check that Table 3 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: #export df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #If Table 3 exists, scrape: dataframe = sf.scrape_table3(driver, dataframe, e) #####Done Scraping #export df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #============================================================= #SCRAPE RESULT FROM PAGE 3 #============================================================= #click on name link = driver.find_element(By.XPATH, "//tr[td='" + c + "']/td/preceding-sibling::td[3]") link.click() ##Table 1 - "ParoleeInformation" dataframe = sf.scrape_table1(driver, dataframe) ##Table 2 - "SupervisionInformation" #######Check that Table 2 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_supervisionInformation']/tbody/tr[1]/td[1]") except NoSuchElementException: try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: # if neither supervision nor offense tables exist df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #if offense table exists: dataframe = sf.scrape_table3(driver, dataframe, e) #export to csv: df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue ##If Table 2 (supervision info) exists, scrape: dataframe = sf.scrape_table2(driver, dataframe) ##Table 3 - "OffenseInformation" #######Check that Table 3 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: #export df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #If Table 3 exists, scrape: dataframe = sf.scrape_table3(driver, dataframe, e) #####Done Scraping #export #export_name_df() df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #============================================================= #SCRAPE RESULT FROM PAGE 2 #============================================================= #click on name link = driver.find_element(By.XPATH, "//tr[td='" + c + "']/td/preceding-sibling::td[3]") link.click() ##Table 1 - "ParoleeInformation" dataframe = sf.scrape_table1(driver, dataframe) ##Table 2 - "SupervisionInformation" #######Check that Table 2 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_supervisionInformation']/tbody/tr[1]/td[1]") except NoSuchElementException: try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: # if neither supervision nor offense tables exist #export_name_df() df_list.append(dataframe) # #Go back to home page sf.new_search(driver, wait) continue #if offense table exists: dataframe = sf.scrape_table3(driver, dataframe, e) #export to csv: df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue ##If Table 2 (supervision info) exists, scrape: dataframe = sf.scrape_table2(driver, dataframe) ##Table 3 - "OffenseInformation" #######Check that Table 3 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: #export df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #If Table 3 exists, scrape: dataframe = sf.scrape_table3(driver, dataframe, e) #####Done Scraping #export df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #============================================================= #SCRAPE RESULT FROM PAGE 1 #============================================================= #click on name link = driver.find_element(By.XPATH, "//tr[td='" + c + "']/td/preceding-sibling::td[3]") link.click() ##Table 1 - "ParoleeInformation" dataframe = sf.scrape_table1(driver, dataframe) ##Table 2 - "SupervisionInformation" #######Check that Table 2 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_supervisionInformation']/tbody/tr[1]/td[1]") except NoSuchElementException: try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: # if neither supervision nor offense tables exist #export_name_df() df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue #if offense table exists: dataframe = sf.scrape_table3(driver, dataframe, e) #export to csv: df_list.append(dataframe) #Go back to home page sf.new_search(driver, wait) continue ##If Table 2 (supervision info) exists, scrape: dataframe = sf.scrape_table2(driver, dataframe) ##Table 3 - "OffenseInformation" #######Check that Table 3 exists try: check = driver.find_element(By.XPATH, "//*[@id='MainContent_offenseInformationTable']/tbody/tr[2]/td[1]") except NoSuchElementException: #export df_list.append(dataframe) # find new search button by xpath sf.new_search(driver, wait) continue #If Table 3 exists, scrape: dataframe = sf.scrape_table3(driver, dataframe, e) #####Done Scraping #export df_list.append(dataframe) # find new search button by xpath sf.new_search(driver, wait) #============================================================================= # END OF LOOPS - CONCAT AND EXPORT AS CSV #============================================================================= #end loop; close driver driver.close() driver.quit() # concatenate all nysid dfs global full_output full_output = pd.concat(df_list) #Capitalize certain columns for col in ('Name:', 'Parole status:', 'Class 1', 'Class 2', 'Class 3', 'Class 4', 'Class 5', 'Class 6', 'Class 7', 'Class 8', 'Class 9', 'Class 10', 'County 1', 'County 2', 'County 3', 'County 4', 'County 5', 'County 6', 'County 7', 'County 8', 'County 9', 'County 10'): full_output[col] = full_output[col].fillna("") full_output[col] = full_output[col].astype(str).str.title() # Add column indicating day data was scraped full_output["Date Info Scraped:"] = today #today_str !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # #Turn dates into datetime objects # full_output[["Date of birth:", "Release to parole supervision:"]]= full_output[["Date of birth:", "Release to parole supervision:"]].astype('datetime64[ns]') full_output[["Date of birth:", "Release to parole supervision:", "Effective date:"]]= full_output[["Date of birth:", "Release to parole supervision:","Effective date:"]].astype('datetime64[ns]') # Add age column idob = full_output.columns.get_loc("Date of birth:") # get index location of DOB age = (today - full_output["Date of birth:"]) / np.timedelta64(1, 'Y') # get series of ages age = np.floor(age) # get actual age, no decimals full_output.insert(loc = idob +1, column = 'Age:', value = age) #insert new age column directly after DOB column # Add months since release column irel = full_output.columns.get_loc("Release to parole supervision:") # get index location of release date mrelease = (today - full_output["Release to parole supervision:"]) / np.timedelta64(1, 'M') # get series mrelease = np.around(mrelease, decimals=1, out=None) # round to 1 decimal full_output.insert(loc = irel +1, column = "Months Since Release:", value = mrelease) #insert new column directly after release date iconv = full_output.columns.get_loc("Crime of conviction 10") count_conv = full_output[['Crime of conviction 1', 'Crime of conviction 2', 'Crime of conviction 3', 'Crime of conviction 4', 'Crime of conviction 5', 'Crime of conviction 6', 'Crime of conviction 7', 'Crime of conviction 8', 'Crime of conviction 9', 'Crime of conviction 10']].apply(lambda x: x.notnull().sum(), axis='columns') full_output.insert(loc = iconv +1, column = "Total Convictions", value = count_conv) # insert total after last conviction column # make null IDs empty cells full_output['ID'] = np.where( full_output['ID'] == "nan", '', full_output['ID']) # make new dataframe for summary stats output_stats = full_output.copy() # change empoty spaces back to nans full_output = full_output.replace(r'^\s*$', np.nan, regex=True) #export to csv full_output.to_csv(output_dir + '/parole_full_output_'+ dyn_dir_name +'.csv', index=False, encoding="utf-8") #============================================================================= # SUMMARY STATISTICS #============================================================================= # summary stats on continuous variables global stats_numeric stats_numeric = full_output[["Age:", "Months Since Release:","Total Convictions"]].describe().loc[['count','min','mean','max']] # create age bins bins = [0,20,30,40,50,60,150] group_names=['0 - 20','21-30','31-40','41-50','51-60','60 +'] output_stats['age_bin']=pd.cut(output_stats['Age:'],bins,labels=group_names).astype(object) global age_freq age_freq = sf.freq_table(output_stats, "age_bin", "Age") # get frequency of unique individuals with cases in different counties county_list = ['County 1', 'County 2', 'County 3', 'County 4', 'County 5', 'County 6', 'County 7', 'County 8', 'County 9', 'County 10'] output_stats[county_list] = output_stats[county_list].fillna("") counties = np.unique(output_stats[county_list].values) counties = [item for item in counties if item != ""] global county_unique_freq county_unique_freq = pd.DataFrame(columns=['Count', '%'], index = counties) county_unique_freq.index.rename("Counties All Convictions", inplace = True) for county in counties: output_stats[county+'_unique'] = output_stats[['County 1', 'County 2', 'County 3', 'County 4', 'County 5', 'County 6', 'County 7', 'County 8', 'County 9', 'County 10']].apply( lambda x: x.str.contains(county ,case=False)).any(axis=1).astype(int) c = output_stats[county+'_unique'].sum() p = str(round((c/len(output_stats.index))*100, 1))+ '%' county_unique_freq.loc[county].Count = c county_unique_freq.loc[county]['%'] = p county_unique_freq.reset_index(inplace = True) # frequency tables for race/ethnicity global race_freq race_freq = sf.freq_table(output_stats, "Race / ethnicity:", "Race/Ethnicity") # frequency tables for parole status global pstatus_freq pstatus_freq = sf.freq_table(output_stats, "Parole status:", "Parole Status") # get counts of charges conv_list = ['Crime of conviction 1', 'Crime of conviction 2', 'Crime of conviction 3', 'Crime of conviction 4', 'Crime of conviction 5', 'Crime of conviction 6', 'Crime of conviction 7', 'Crime of conviction 8', 'Crime of conviction 9', 'Crime of conviction 10'] output_stats[conv_list] = output_stats[conv_list].fillna("") convictions = np.unique(output_stats[conv_list].values) convictions = [item for item in convictions if item != ""] global convictions_freq convictions_freq = pd.DataFrame(columns=['Count', '%'], index = convictions) convictions_freq.index.rename("Charges All Convictions", inplace = True) for conv in convictions: output_stats[conv+'_unique'] = output_stats[['Crime of conviction 1', 'Crime of conviction 2', 'Crime of conviction 3', 'Crime of conviction 4', 'Crime of conviction 5', 'Crime of conviction 6', 'Crime of conviction 7', 'Crime of conviction 8', 'Crime of conviction 9', 'Crime of conviction 10']].apply( lambda x: x.str.contains(conv ,case=False)).any(axis=1).astype(int) c = output_stats[conv+'_unique'].sum() p = str(round((c/len(output_stats.index))*100, 1))+ '%' convictions_freq.loc[conv].Count = c convictions_freq.loc[conv]['%'] = p convictions_freq.reset_index(inplace = True) #get top charge (crime of conviction #1) fequency table global top_charge_freq top_charge_freq = sf.freq_table(output_stats, 'Crime of conviction 1', "Top Charge") ##### Export -------------------------------------------------------------- # Create a Pandas Excel writer writer = pd.ExcelWriter(output_dir + '/summary_statistics_'+ dyn_dir_name + '.xlsx', engine='xlsxwriter') # Write each dataframe to a different worksheet. stats_numeric.to_excel(writer, sheet_name='Summary Statistics', index = False) age_freq.to_excel(writer, sheet_name='Age', index = False) race_freq.to_excel(writer, sheet_name='Race.Ethnicity', index = False) pstatus_freq.to_excel(writer, sheet_name='Parole Status', index = False) county_unique_freq.to_excel(writer, sheet_name='Unique People per County', index = False) top_charge_freq.to_excel(writer, sheet_name='Top Charge', index = False) convictions_freq.to_excel(writer, sheet_name='Unique People per Charge Type', index = False) # Close the Pandas Excel writer and output the Excel file. writer.save() #create list with stats dataframes stats_list = [stats_numeric, race_freq, age_freq, pstatus_freq, county_unique_freq, top_charge_freq, convictions_freq] return full_output, stats_list