Sei sulla pagina 1di 2

import pandas as pd

import os

#Required variables
fileNamePart = "_2g_kpi" #"3gsite"
fileExtension = ".csv"
headerLine = 8

firstFile = True
saveFileName = fileNamePart + '_allInOne.csv' #Name of the merged csv file.
summaryFileName = fileNamePart + '_allInOne_Summary.csv' #Name of the summary sheet
of merged csv file.

#Delete the output merged file and its summar file if it already exist there.
mergedFile = os.path.join(os.getcwd(),saveFileName)
mergedSummaryFile = os.path.join(os.getcwd(),summaryFileName)
if os.path.isfile(mergedFile): #delete merged file if exist
os.remove(mergedFile)
print("merged file deleted.")
if os.path.isfile(mergedSummaryFile): #delete summary sheet of merged file if exist
os.remove(mergedSummaryFile)
print("summary sheet deleted.")

#Keep track of total number of rows merged and number of files merged
totalRows = 0
fileCount = 1
#list of files in current directory.
files = [f for f in os.listdir('.') if os.path.isfile(f)]

for f in files:
filename, ext = os.path.splitext(f)
if fileNamePart in filename and ext == fileExtension:

summary_name_df = pd.DataFrame([f]) #Name of the csv file being merged


with extenstion.
summary_header_df = pd.read_csv(f, skiprows = headerLine-1, header =
None, nrows=1) #Read header row from file being merged.

#Read data from csv files


read_df = pd.read_csv(f, skiprows = headerLine-1) #Pandas consider
first row as header

rows, cols = read_df.shape #Find number of rows and columns in each csv
files.
summary_rows = pd.DataFrame([rows]) #Number of rows of data. Header is
not counted as row

#DataFrame to write to summary of merged sheet i.e., (name, no_of_rows,


columns)
summary_df = pd.concat([summary_name_df, summary_rows,
summary_header_df], axis=1)

if firstFile: #Write in case of first file


#write to merged csv file
read_df.to_csv(saveFileName, index = False) #Pandas also write
header in new csv
#For first csv file also write header
#write to summary sheet of merged file
summary_df.to_csv(summaryFileName, header = False, index = False)
firstFile = False
else: #Append in case of other files
#write to merged csv file
read_df.to_csv(saveFileName, mode='a',header = False, index =
False)#Pandas do not write header in new csv
#Dont write header to merged file for other csv files.

#write to summary sheet of merged file


summary_df.to_csv(summaryFileName, mode='a', header = False,
index = False)
#print on console and update totalRows, fileCount
print(str(fileCount), ".", f, ": %d rows" % (rows)) #output: SN.
File_Name_extension: Column_headers
totalRows = totalRows + rows
fileCount += 1

print("Total rows: " + str(totalRows))


print("Done!")

Potrebbero piacerti anche