Delete the first column from a csv file in Python [duplicate]

Delete the first column from a csv file in Python [duplicate] - python

This question already has an answer here:
Delete specific columns from csv file in python3
(1 answer)
Closed 4 years ago.
I have the following code in Python to delete the first row from csv files in the folder in_folder and then save them in the folder out_folder. Now I need to delete the first column of a csv file. Does anyone know how to do this?
import csv
import glob
import os
import shutil
path = 'in_folder/*.csv'
files=glob.glob(path)
#Read every file in the directory
x = 0 #counter
for filename in files:
with open(filename, 'r') as fin:
data = fin.read().splitlines(True)
with open(filename, 'w') as fout:
fout.writelines(data[1:])
x+=1
print(x)
dir_src = "in_folder"
dir_dst = "out_folder"
for file in os.listdir(dir_src):
if x>0:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)

What you can do is to use Pandas as it can achive DataFrame manipulation.
file.csv
1,2,3,4,5
1,2,3,4,5
1,2,3,4,5
Your code should look like
import pandas as pd
df = pd.read_csv('file.csv')
# If you know the name of the column skip this
first_column = df.columns[0]
# Delete first
df = df.drop([first_column], axis=1)
df.to_csv('file.csv', index=False)
file.csv
2,3,4,5
2,3,4,5
2,3,4,5

Related

How to convert multiple json files to cvs files

Hello I have multiple json files in a path and I want to convert all of them to csv files separately. Here is what I have tried so far which just convert one json file to a csv file.
with open('/Users/hh/MyDataSet/traceJSON-663-661-A0-25449-7.json') as f:
for line in f:
data.append(json.loads(line))
csv_file=open('/Users/hh/MyDataSet/GTruth/traceJSON-663-661-A0-25449-7.csv','w')
write=csv.writer(csv_file)
# write.writerow(["row number","type","rcvTime","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
# ,"hed_x","hed_y","hed_z"])
write.writerow(["row number","type","rcvTime","sender","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
,"hed_x","hed_y","hed_z"])
for elem in range(len(data)):
if data[elem]['type']==2:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),'663',round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
elif data[elem]['type']==3:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),round(data[elem]['sender'],2),round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
# json_file.close()
print('done!')
csv_file.close()
I appreciate if anyone can help me how can I do it. Also in each json file name "traceJSON-663-661-A0-25449-7", the first number like in the above code (663) should be written in csv file like the following code,if the type is 2:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),'663',....
My json file names are like traceJSON-51-49-A16-25217-7, traceJSON-57-55-A0-25223-7, ....

I suggest using pandas for this:
from glob import glob
import pandas as pd
import os
filepaths = glob('/Users/hh/MyDataSet/*.json') # get list of json files in folder
for f in filepaths:
filename = os.path.basename(f).rsplit('.', 1)[0] # extract filename without extension
nr = int(filename.split('-')[1]) # extract the number from the filename - assuming that all filenames are formatted similarly, use regex otherwise
df = pd.read_json(f) # read the json file as a pandas dataframe, assuming the json file isn't nested
df['type'] = df['type'].replace(2, nr) # replace 2 in 'type' column with the number in the filename
df.to_csv(f'{filename}.csv') # save as csv
If you want to round columns, you can also do this with pandas

import csv
import glob
import json
import os.path
for src_path in glob.glob('/Users/hh/MyDataSet/*.json'):
src_name = os.path.splitext(os.path.basename(src_path))[0]
data = []
with open(src_path) as f:
for line in f:
data.append(json.loads(line))
dest_path = '/Users/hh/MyDataSet/GTruth/' + src_name + '.csv'
csv_file=open(dest_path,'w')
write=csv.writer(csv_file)
write.writerow(["row number","type","rcvTime","sender","pos_x","pos_y","pos_z","spd_x","spd_y","spd_z","acl_x","acl_y","acl_z"
,"hed_x","hed_y","hed_z"])
for elem in range(len(data)):
if data[elem]['type']==2:
sender = src_name.split('-')[1]
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),sender,round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
elif data[elem]['type']==3:
write.writerow([elem,data[elem]['type'],round(data[elem]['rcvTime'],2),round(data[elem]['sender'],2),round(data[elem]['pos'][0],2),round(data[elem]['pos'][1],2)
,round(data[elem]['pos'][2],2),round(data[elem]['spd'][0],2),round(data[elem]['spd'][1],2),round(data[elem]['spd'][2],2),
round(data[elem]['acl'][0],2),round(data[elem]['acl'][1],2),round(data[elem]['acl'][2],2),round(data[elem]['hed'][0],2),
round(data[elem]['hed'][1],2),round(data[elem]['hed'][2],2)])
csv_file.close()
print('done!')

How to merge multiple excel file via python

I have 30 excel files and want to combine them all into only 1 excel file (using Python) and i want only 1 header on top of file ( not keep happening 30 times)
dont know how to write in python
please help.
thank you so much

This is my code snippet for merging 13 excel file in to 1 file.
fout=open("1300 restaurant data.csv", "a", encoding="utf8")
# now the rest:
for num in range(1,13):
f = open(str(num)+".csv",encoding="utf8")
for line in f:
fout.write(line)
f.close() # not really needed
fout.close()

Try using the below code to merge a list of files into a single file.
import glob
import pandas as pd
path = "C:/documents"
file_list = glob.glob(path + "/*.xlsx")
excel_list = []
for file in excel_list:
excel_list.append(pd.read_excel(file))
excel_merged = pd.DataFrame()
for excel_file in excel_list:
excel_merged = excel_merged.append(
excel_file, ignore_index=True)
excel_merged.to_excel('mergedFile.xlsx', index=False)

How to combine horizontally many CSV files using python csv or pandas module?

Hello!
I would like to combine horizontally many CSV files (the total number will oscillate around 120-150) into one CSV file by adding one column from each file (in this case column called “grid”). All those files have the same columns and number of the rows (they are constructed the same) and are stored in the same catalogue. I’ve tried with CSV module and pandas. I don't want to define all 120 files. I need a script to do it automatically. I’m stuck and I have no ideas...
Some input CSV files (data) and CSV file (merged) which I would like to get:
https://www.dropbox.com/transfer/AAAAAHClI5b6TPzcmW2dmuUBaX9zoSKYD1ZrFV87cFQIn3PARD9oiXQ
That's how my code looks like when I use the CSV module:
import os
import glob
import csv
os.chdir('\csv_files_direction')
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('\merged_csv_file_direction')
with open(out_merg,'wt') as out:
writer = csv.writer(out)
for file in files:
with open(file) as csvfile:
data = csv.reader(csvfile, delimiter=';')
result = []
for row in data:
a = row[3] #column which I need
result.append(a)
Using this code I receive values only from the last CSV. The rest is missing. As a result I would like to have one precise column from each CSV file from the catalogue.
And Pandas:
import os
import glob
import pandas as pd
import csv
os.chdir('\csv_files_direction')
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('\merged_csv_file_direction')
in_names = [pd.read_csv(f, delimiter=';', usecols = ['grid']) for f in files]
Using pandas I receive data from all CSV's as the list which can be navigated using e.g in_names[1].
I confess that this is my first try with pandas and I don't have ideas what should be my next step.
I will really appreciate any help!
Thanks in advance,
Mateusz

For the part of CSV i think you need another list define OUTSIDE the loop.
Something like
import os
import sys
dirname = os.path.dirname(os.path.realpath('__file__'))
import glob
import csv
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('merged_csv_file_direction')
result= []
with open(out_merg,'wt') as out:
writer = csv.writer(out)
for file in files:
with open(file) as csvfile:
data = csv.reader(csvfile, delimiter=';')
col = []
for row in data:
a = row[3] #column which I need
col.append(a)
result.append((col))
NOTE: I have also changed the way to go into the folder. Now you can run the file direcly in the folder that contains the 2 folders (one for take the data and the other to save the data)
Regarding the part of pandas
you can create a loop again. This time you need to CONCAT the dataframes that you have created using in_names = [pd.read_csv(f, delimiter=';', usecols = ['grid']) for f in files]
I think you can use
import os
import glob
import pandas as pd
import csv
os.chdir('\csv_files_direction')
extension = 'csv'
files = [i for i in glob.glob('*.{}'.format(extension))]
out_merg = ('\merged_csv_file_direction')
in_names = [pd.read_csv(f, delimiter=';', usecols = ['grid']) for f in files]
result = pd.concat(in_names)
Tell me if it works

Concatenate csv files in python by ascending order of filenames

I need to concatenate csv files with same column headers in python. The csv files with the following filenames should concatenate in order as shown below(ascending order of filename):
AB201602.csv
AB201603.csv
AB201604.csv
AB201605.csv
AB201606.csv
AB201607.csv
AB201608.csv
AB201610.csv
AB201612.csv
I would like to keep the column headers only from first file. Any idea?
I tried to use the below code and it combined the csv file by random filenames and truncated half of the column header name. thanks
csvfiles = glob.glob('/home/c/*.csv')
wf = csv.writer(open('/home/c/output.csv','wb'),delimiter = ',')
for files in csvfiles:
rd = csv.reader(open(files,'r'),delimiter = ',')
rd.next()
for row in rd:
print(row)
wf.writerow(row)

Using #Gokul comment and pandas.
import pandas as pd
import glob
csvfiles = sorted(glob.glob('/home/c/*.csv'))
df = pd.DataFrame()
for files in csvfiles:
df = df.append(pd.read_csv(files))
df.to_csv('newfile.csv')

Breaking up large file, but adding header to each subsequent file

I'm using the following code to break up a large CSV file and I want the original CSV header to be written to each smaller CSV file. The problem I am having, though, is that the current code seems to skip a line of data for each smaller file. So in the example below Line 51 wouldn't be written to the smaller file (code modified from http://code.activestate.com/recipes/578045-split-up-text-file-by-line-count/). It seems to skip that line or perhaps it's being overwritten by the header:
import os
filepath = 'test.csv'
lines_per_file=50
lpf = lines_per_file
path, filename = os.path.split(filepath)
with open(filepath, 'r') as r:
name, ext = os.path.splitext(filename)
try:
w = open(os.path.join(path, '{}_{}{}'.format(name, 0, ext)), 'w')
header = r.readline()
for i, line in enumerate(r):
if not i % lpf:
#possible enhancement: don't check modulo lpf on each pass
#keep a counter variable, and reset on each checkpoint lpf.
w.close()
filename = os.path.join(path,
'{}_{}{}'.format(name, i, ext))
w = open(filename, 'w')
w.write(header)
w.write(line)
finally:
w.close()

Consider using pandas to split the large csv file:
Lets create a csv file having 500 rows and four columns using pandas:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(500,4), columns=['a','b','c','d'])
df.to_csv('large_data.csv', index=False)
Lets split the large_data.csv in to multiple csv files of each having 50 rows:
import pandas as pd
df = pd.read_csv('large_data.csv', chunksize=50)
i = 1
for chunk in df:
chunk.to_csv('split_data_'+str(i)+'.csv', index=False)
i = i+1
This would have produced the following resultant files:

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Delete the first column from a csv file in Python [duplicate] - python

Related

How to convert multiple json files to cvs files

How to merge multiple excel file via python

How to combine horizontally many CSV files using python csv or pandas module?

Concatenate csv files in python by ascending order of filenames

Breaking up large file, but adding header to each subsequent file

Categories

Resources