Python Pandas: Doing loop for every file in directory - python

So I have code which works for one particular file in directory.
I want to make a loop which will do the following but for every .csv file in directory
1) Open file 2) Add one column 3) Save file to new location
My code
import pandas as pd
import os
import glob
plik = pd.read_csv('C:\Python\zrodlo\CSCO.csv', delimiter=";")
plik['Change'] = ((plik['Close'] - plik['Open'])/plik['Open']*100)
plik.to_csv('C:\Python\zrodlo\csv_nowe_pliki\ew_file.csv')
Lines which might come in handy for the loop. I did not know how to make good use of them
os.chdir('C:\Python\zrodlo')
print(os.getcwd())
for filename in os.listdir('C:\Python\zrodlo'):
if filename.endswith(".csv"):
print(filename)
2)
path = "C:\Python\zrodlo\*.csv"
for fname in glob.glob(path):
print(fname)```
Thank you for your input
EDIT, Question is what to put into last line for loop to save multiple files?
import pandas as pd
import os
import glob
path = "C:\Python\zrodlo\*.csv"
for fname in glob.glob(path):
print(fname)
plik = pd.read_csv(fname, delimiter=";")
plik['Change'] = ((plik['Close'] - plik['Open']) / plik['Open'] * 100)
plik.to_csv('C:\Python\zrodlo\csv_nowe_pliki\ew_file.csv')
EDIT, SOLUTION
import pandas as pd
import os
import glob
os.chdir('C:\Python\zrodlo')
print(os.getcwd())
for filename in os.listdir('C:\Python\zrodlo'):
if filename.endswith(".csv"):
print(filename)
plik = pd.read_csv('C:\Python\zrodlo\\'+filename, delimiter=";")
plik['Change'] = ((plik['Close'] - plik['Open'])/plik['Open']*100)
os.chdir('C:\Python\zrodlo\csv_nowe_pliki')
plik.to_csv(filename)
os.chdir('C:\Python\zrodlo')

Is this what you want?
import pandas as pd
import os
import glob
os.chdir('C:\Python\zrodlo')
print(os.getcwd())
for filename in os.listdir('C:\Python\zrodlo'):
if filename.endswith(".csv"):
print(filename)
plik = pd.read_csv('C:\Python\zrodlo\'+filename, delimiter=";")
plik['Change'] = ((plik['Close'] - plik['Open'])/plik['Open']*100)
# You need to change the filename below otherwise you are rewriting every time after you created it
plik.to_csv(glob.glob("C:\Python\zrodlo\csv_nowe_pliki\ew_file.csv"))

Related

Want to add filename to a list in Streamlit then convert to dataframe

I want to add the filename to the following dataframe but my code isn't working:
import pandas as pd
import numpy as np
import streamlit as st
from tabula.io import read_pdf
import os
import glob
# Title
st.title('PDF to Excel')
files = st.file_uploader('Upload all orders',accept_multiple_files=True)
if files:
for file in files:
file.seek(0)
files_read = [read_pdf(file,pages='all')[0] for file in files]
st.write(type(files_read))
df = pd.concat(files_read)
df["PO"] = file.name
It only adds the first file's name (there are two files in this instance).
Any help please?
Picture here
Figured it out eventually!
import pandas as pd
import numpy as np
import streamlit as st
from tabula.io import read_pdf
import os
import glob
# Title
st.title('PDF to Excel')
files = st.file_uploader('Upload all orders',accept_multiple_files=True)
if files:
for file in files:
st.write("Filename: ", file.name)
filenames = [file.name for file in files]
file.seek(0)
files_read = [read_pdf(file,pages='all')[0] for file in files]
for dataframe, filename in zip(files_read,filenames):
dataframe['filename'] = filename
st.write(type(files_read))
df = pd.concat(files_read)

read in csv files from a folder and create html files

I'm new to python and hoping for some help to read in csv files from a folder and converting each file to a html folder...this is what I have so far:
import pandas as pd
import os
import glob
path = "htmlplots"
csv_files = glob.glob(os.path.join(path, "*.csv"))
for file in csv_files:
# read the csv file
df = pd.read_csv(file)
# print the filename
print('File Name:', file.split("\\")[-1])
# print the content
display(df)
Ideally I then need to create html files from the resulting csv files that have a 'next' and 'previous' link from one to two, two to three (next) and three to two, two to one (previous).
Use:
import pandas as pd
import os
import glob
path = ""
csv_files = glob.glob(os.path.join(path, "*.csv"))
for i, file in enumerate(csv_files):
df = pd.read_csv(file, header = None)
name = file.split('.')[-1]
if i>0:
prev = csv_files[i-1]
df.loc['prev',:]=f'http://{prev}'
else:
df.loc['prev',:]=''
if i!=len(csv_files)-1:
next = csv_files[i+1]
df.loc['next',:]=f'http://{next}'
else:
df.loc['next',:]=''
df.to_html(f"{file}.html", render_links = True)
Input csv file:
Output html:

Renaming multiple csv files within a folder in Python

I have a folder with 50 .csv files. The .csv files are auto-generated and a results/ output from a process-based model (long and automatically named). For example, sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv, and it continues till sandbox_username_vetch_scaleup_IA_50.csv.
I am trying to shorten the file names in a way so that the files are names are IA_1, IA_2 ...up to IA_50 and subsequently the new .csv file name gets added as a column to the data frame. Here is what I have tried so far
# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd()
print (retval) # see in which folder you are
os.chdir(data_p) # move to the folder with your data
os.getcwd()
filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)
#Loop over
for f in range(len(fnames)):
print(f'fname: {fnames[f]}\n')
pfile = pd.read_csv(fnames[f], delimiter=",") # read in file
#extract filename
filename = fnames[f]
parts = filename.split(".") # giving you the number in file name and .csv
only_id = parts[0].split("_") # if there is a bracket included
# get IA from your file
filestate = pfile["IA"][0] # assuming this is on the first row
filestate = str(filestate)
# get new filename
newfilename = only_id[0]+"-"+filestate+parts[1]
# save your file (don't put a slash at the end of your directories on top)
pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)
Here is the code for adding the csv file name as a column
import glob
import os
import shutil
import sys
import pandas as pd
path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'
all_files = glob.glob(os.path.join(path, "*.csv"))
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')]
df = pd.DataFrame()
for file_ in all_files:
file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
file_df['file_name'] = file_
df = df.append(file_df)
#However, this adds the old csv file name and not the renamed one
In order to rename and move these files, all you need is:
import glob
import os
import shutil
import sys
SOURCE = '<Your source directory>'
TARGET = '<Your target directory>'
for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
idx = file.index('_IA_')
filename = file[idx+1:]
target = os.path.join(TARGET, filename)
if os.path.exists(target):
print(f'Target file {target} already exists', file=sys.stderr)
else:
shutil.copy(file, target)
As there's nothing in the OP's question that tries to handle modification of the CSV files, that is left as an exercise for the OP.
Source and target directories should be different otherwise this can lead to ambiguous results

how to sort out files according to excel

I have an Excel file that contains long product tag name like(for now, just working on 3 of them):
4049047000037
4049047000044
4049047118954
and i have a folder on my desktop called "1" containing .jpg files with tag names like:
4049047000037.jpg
4049047000044.jpg
4049047118954.jpg
i want to write a code, if tag name in my excel, i want to copy that .jpg file to an other folder.
import os
import pandas as pd
movdir = ["C:\Users\muhammedcan\Desktop\1"]
basedir = "C:\Users\muhammedcan\Desktop\2"
i=0
#to see what i have in my folder
print os.listdir("C:/Users/muhammedcan/Desktop/1/")
df= pd.read_excel("deneme.xls", sheetname= "sayfa4")
df1= df.columns[1]
listSepalWidth = df[df1]
print listSepalWidth
#to make file name and product tag name same
for i in listSepalWidth:
i=str(i)+(".jpg")
print i
can you help me with copying file into an other file if it is exist in my excel?
this is my result so far:
['4049047000037.jpg', '4049047000044.jpg', '4049047000068.jpg',
'4049047000075.jpg', '4049047000082.jpg', '4049047000105.jpg',
'4049047118947.jpg', '4049047118954.jpg']
4049047000037.jpg
4049047000044.jpg
4049047118954.jpg
4049047000068.jpg
4049047000075.jpg
4049047000082.jpg
4049047118947.jpg
4049047000105.jpg
I used following code, and I am recieving error.
from shutil import copyfile
copyfile("C:\Users\muhammedcan\Desktop\1", "C:\Users\muhammedcan\Desktop\2")
Error is:
C:\Python27\python.exe "C:/Users/muhammedcan/Desktop/summer
courses/programing/MP4/solution/my_work.py"
Traceback (most recent call last):
File "C:/Users/muhammedcan/Desktop/summer courses/programing/MP4/solution/my_work.py", line 3, in <module>
copyfile("C:\Users\muhammedcan\Desktop\1",
Process finished with exit code 1
The following should do what you are looking for:
import os
import glob
import pandas as pd
import shutil
source_folder = r"C:\Users\muhammedcan\Desktop\1"
destination_folder = r"C:\Users\muhammedcan\Desktop\2"
available_filenames = [os.path.basename(fn) for fn in glob.glob(os.path.join(source_folder, '*.jpg'))]
df = pd.read_excel("deneme.xls", sheetname="sayfa4")
for tag_name in df.iloc[:,1]:
filename = "{}.jpg".format(tag_name)
if filename in available_filenames:
print "{} - found".format(filename)
shutil.copyfile(os.path.join(source_folder, filename), os.path.join(destination_folder, filename))
else:
print "{} - not found".format(filename)
If first creates a list of .jpg filenames found in the source_folder. It then loads the Excel file into pandas and iterates over the second column. If the tag name is found in the list of available_filenames the shutil.copyfile() function is used to copy the file from 1 to 2. Note os.path.join() is used to safely join parts of a file together.
To make it into a function to let you also do 'pdf' you could do:
import os
import glob
import pandas as pd
import shutil
source_folder = r"C:\Users\muhammedcan\Desktop\1"
destination_folder = r"C:\Users\muhammedcan\Desktop\2"
df = pd.read_excel("deneme.xls", sheetname="sayfa4")
def copy_files(source_folder, destination_folder, extension):
available_filenames = [os.path.basename(fn) for fn in glob.glob(os.path.join(source_folder, '*.{}'.format(extension)))]
for tag_name in df.iloc[:,1]:
filename = "{}.{}".format(tag_name, extension)
if filename in available_filenames:
print "{} - found".format(filename)
shutil.copyfile(os.path.join(source_folder, filename), os.path.join(destination_folder, filename))
else:
print "{} - not found".format(filename)
copy_files(source_folder, destination_folder, 'jpg')
copy_files(source_folder, destination_folder, 'pdf')
This assumes the same deneme.xls is used for both. If not it could be passed as another argument to the function.

Python, open a file when the name is not fully known

I have a list of files with names such as these:
20140911_085234.csv
20140912_040056.csv
What is known is the first part which is the date (the second is a random number). How can I open the correct file if I know the date?
Update: There is one file per day.
As #isedev says, you could use the fnmatch method to find all the files with the "date" pattern. The code could be like this:
from fnmatch import fnmatch
import os
folder_path = '/home/Desktop/project'
all_files = os.listdir(folder_path)
content_file = 'Hello World'
_date = '20140911'
_pattern = _date + '*'
for file_name in all_files:
if fnmatch(file_name, _pattern):
with open(os.path.join(folder_path, file_name), 'wb') as f:
f.write(content_file)
I hope it helps you!
Using glob :
import time
import glob
import os
def open_file_by_date(date):
path = "/path/to/file"
files = glob.glob1(path, date + "_*.csv")
for file in files:
with open(os.path.join(path, file), 'wb') as f:
#do your stuff with file
if __name__ == "__main__":
today = time.strftime("%Y%m%d")
open_file_by_date(today)

Categories

Resources