I have 100 csv file. I want to print particular columns from all the csv file with the file name. Here in this code I can print all of the csv file.
path = r'F:\11 semister\TPC_MEMBER'
all_files = glob.glob(path + "/*.csv")
dataStorage = {}
for filename in all_files:
name = os.path.basename(filename).split(".csv")[0]
dataStorage[name] = pd.read_csv(filename)
print(name)
dataStorage
May be you want this.
import pandas as pd
import numpy as np
import glob
path = r'folderpath' #provide your folder path where your csv files are stored.
all_csv= glob.glob(path + "/*.csv")
li = []
for filename in all_csv:
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
data_frame = pd.concat(li, axis=0, ignore_index=True)
data_frame['columnname'] # enter the name of your dataframe's column.
print(data_frame)
Related
How i can select last raw in text files with for?
this my first idea code :
import glob
import pandas as pd
path = input("Insert location:")
file_list = glob.glob(path + "/*.txt")
txt_list = []
for file in file_list:
txt_list.append(pd.read_csv(file))
for file in file_list:
txt_list[-7::3]
excl_merged = pd.concat(txt_list, ignore_index=True)
excl_merged.to_excel('Total.xlsx', index=False) ]
Your code is incorrect. Here is a version that should work:
import glob
import pandas as pd
path = input("Insert location:")
file_list = glob.glob(path + "/*.txt")
df_list = []
for file in file_list:
df = pd.read_csv(file)
df_list.append(df.tail(3)) # last 3 rows from each file dataframe
excl_merged = pd.concat(df_list, ignore_index=True)
excl_merged.to_excel('Total.xlsx', index=False)
Explaination: tail() method takes the last several rows (provided as an argument) from a dataframe.
I am trying to merge differents csv in Python. The files are in the same folder. All files have one column in common 'client_ID'. I tried this code:
path= r'/folder_path/'
allfiles = glob.glob(path + "/*.csv")
df = pd.DataFrame()
for file in allfiles:
df_file = pd.read_csv(file)
df_file = pd.merge(df, df_file, on='partner_id')
df
You can read the first csv file first so that you don't start with an empty dataframe. I would edit your code like this:
path= r'/folder_path/'
allfiles = glob.glob(path + "/*.csv")
for i, file in enumerate(allfiles):
if i < 1:
df = pd.read_csv(file)
else:
df_file = pd.read_csv(file)
df = pd.merge(df, df_file, on='partner_id')
df
This is what I have so far. I need to combine 3 files from my google drive to one. I do not get an error with this code, but it only imports 1 file.
import pandas as pd
import glob
path = '/content/gdrive/My Drive/Colab Datasets/'
all_files = glob.glob(path + "/*.csv") # this is new
li = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
Simply by following this code:
import os
import glob
import pandas as pd
os.chdir("content/gdrive/My Drive/Colab Datasets")
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames])
#export to csv
combined_csv.to_csv( "combined_csv.csv", index=False, encoding='utf-8-sig')
I have about 200 CSV files in a folder and some of the columns are not named the same for instance there is 'App name' ,'App Name' and 'app name' so I want to rename the this kind of columns to a standard name like 'App Name' and concatenate all the CSV's to one CSV file,
That would work:
import glob
import os
import pandas as pd
folder = 'the folder path'
filenames = [i for i in glob.glob(folder + os.path.sep + '*.csv')]
combined_csv = pd.concat([pd.read_csv(f, skiprows=1) for f in filenames])
combined_csv.columns = ["all the header labels"]
import glob
import pandas as pd
csv_folder = '/Users/yourname/folder_containing_csv/*.csv'
csv_file_list = []
for csv_path in glob.glob(csv_folder):
csv_file_list.append(csv_path)
for i in range(len(csv_file_list)):
df = pd.read_csv(csv_file_list[i], index_col=0)
df = df.rename(columns={"old_name": "new_name"})
file_name = str(csv_file_list[i]).split('.csv')[0].split('/')[-1:][0] # get file name by subsetting the path
df.to_csv('/Users/yourname/%(file_name)s.csv'%{'file_name': file_name}) # save to .csv using the same file name
I am reading in multiple files and adding them to a list:
import pandas as pd
import glob
import ntpath
path = r'C:\Folder1\Folder2\Folder3\Folder3'
all_files = glob.glob(path + "/*.dat") #.dat files only
mylist = []
for filename in all_files:
name = ntpath.basename(filename) # for renaming the DF
name = name.replace('.dat','') # remove extension
try:
name = pd.read_csv(filename, sep='\t', engine='python')
mylist.append(name)
except:
print(f'File not read:{filename}')
Now I want to just display the DFs in this list.
This is what I've tried:
for thing in mylist:
print(thing.name)
AttributeError: 'DataFrame' object has no attribute 'name'
And
for item in mylist:
print(item)
But that just prints the whole DF content.
name = pd.read_csv(filename, sep='\t', engine='python')
mylist.append(name)
Here, name is a dataframe, not the name of your dataframe.
To add name to your dataframe, use
df = pd.read_csv(filename, sep='\t', engine='python')
df_name="Sample name"
mylist.append({'data':df, 'name':df_name})
>>> print(thing['name'])
Sample name
You can use a dictionary for that.
Writing to dict:
import pandas as pd
import glob
import ntpath
path = r'C:\Folder1\Folder2\Folder3\Folder3'
all_files = glob.glob(path + "/*.dat") #.dat files only
mydict = {}
for filename in all_files:
name = ntpath.basename(filename) # for renaming the DF
name = name.replace('.dat','') # remove extension
try:
mydict[name] = pd.read_csv(filename, sep='\t', engine='python')
except:
print(f'File not read:{filename}')
To read a df (say filename1) again:
df = mydict['filename1']
or to iterate over all df's in mydict:
for df in mydict.values():
# use df...
or:
for key in mydict:
print(key)
df = mydict[key]
# use df...