How to rename dataframe with file name in python? - python

I have several csv files in a folder that I need to read and do the same thing to each file. I want to rename each dataframe that is created with the file name, but am not sure how. Could I store the file names in a list and then refer to them later somehow...? My current code is bellow. Thank you in advance.
import os
Path = "C:\Users\DATA"
filelist = os.listdir(Path)
for x in filelist:
RawData = pd.read_csv("C:\Users\DATA\%s" % x)

What if you have just one dataframe with all files?
import os
path = "C:\Users\DATA"
raw_data = {i: pd.read_csv(os.path.abspath(i)) for i in os.listdir(path)}

Related

How to convert any .csv file to an excel file using Python?

I'm trying to convert any .csv file from this path to an excel file. The code works but I need to rename that .csv file manually. Is there a way to read and convert whatever .csv file without renaming it? Your input is highly appreciated. Thanks a lot!
import pandas as pd
read_file = pd.read_csv(r'C:/Processed_Report/Source1.csv')
read_file.to_excel (r'C:/Processed_Report/Source.xlsx', index = None, header=True)
I don't sure if i understood, but this is how you can read all file inside a folder and convert them to excel files.
import os
for root, dirs, files in os.walk("C:/Processed_Report/", topdown=False):
for name in files:
base_name, ext = os.path.splitext(name) #Split name, extension
if ext in ".cvs":
df = pd.read_csv(os.path.join(root, name))
df.to_excel(os.path.join(root, f'{base_name}.xlsx'))
Using pathlib
from pathlib import Path
import pandas as pd
file_path = r"C:/Processed_Report"
files = [x for x in Path(file_path).glob("*csv")]
[pd.read_csv(x).to_excel(f"{file_path}/{x.stem}.xlsx", index=False) for x in files]

Read multiple files present in 2 folders

I have 7 vcf files present in 2 directories:
dir
I want to concatenate all files present on both folders and then read them through python.
I am trying this code:
# Import Modules
import os
import pandas as pd
import vcf
# Folder Path
path1 = "C://Users//USER//Desktop//Anas/VCFs_1/"
path2 = "C://Users//USER//Desktop//Anas/VCFs_2/"
#os.chdir(path1)
def read(f1,f2):
reader = vcf.Reader(open(f1,f2))
df = pd.DataFrame([vars(r) for r in reader])
out = df.merge(pd.DataFrame(df.INFO.tolist()),
left_index=True, right_index=True)
return out
# Read text File
def read_text_file(file_path1,file_path2):
with open(file_path1, 'r') as f:
with open(file_path2,'r') as f:
print(read(path1,path2))
# iterate through all file
for file in os.listdir():
# Check whether file is in text format or not
if file.endswith(".vcf"):
file_path1 = f"{path1}\{file}"
file_path2 = f"{path2}\{file}"
print(file_path1,"\n\n",file_path2)
# call read text file function
#data = read_text_file(path1,path2)
print(read_text_file(path1,path2))
But its giving me permission error. I know when we try to read folders instead files then we get this error. But how can i read files present in folders? Any suggestion?
You may need to run your Python code with Administrator privileges, if you are trying to access another user's files.

Python to rename files in a folder with matching name in CSV file

I am trying to write a script which will rename a block of asp files to their correct name.
Currently I have a folder which has asp files in it which are just named as id1.asp, id2,asp and so on.
I have a CSV file which has the ids in it and a description for that ID.
id1, pen
id2, rubber
id3, paper
etc.
I am trying to work out how to rename the id1.asp to be pen.asp, id2.asp to rubber.asp and so on.
Thank you. This is what I have tried so far:
import csv
import os
import shutil
a_csv_file = open("skuconvert.csv", "r")
dict_reader = csv.DictReader(a_csv_file)
for row in a_csv_file:
print (row)
ordered_dict_from_csv = list(dict_reader)[0]
dict_from_csv = dict(ordered_dict_from_csv)
print(dict_from_csv)
dirs = os.listdir('./')
path = ''
head_tail = os.path.split(path)
You can use glob.glob() to find all of your ASP files in a folder. First load your CSV file in as a dictionary (this assumes there are two columns, the IDs and the names). If there are more columns or if there is a header this would need to be dealt with differently.
import glob
import csv
import os
import shutil
with open("skuconvert.csv") as f_input:
csv_input = csv.reader(f_input)
ids = dict(csv_input)
asp_files = glob.glob(r"f:\dropbox\python temp\*.asp")
for asp_file in asp_files:
path, basename = os.path.split(asp_file)
filename, ext = os.path.splitext(basename)
if filename in ids:
new_name = os.path.join(path, f'{ids[filename]}{ext}')
print(f"Renaming: '{asp_file}' to '{new_name}'")
try:
shutil.move(asp_file, new_name)
except:
print(f"Unable to rename: {new_name}")
else:
print(f"ID unknown: {filename}")
Then for each ASP file that is found, split out the path and extension and determine if the ID is found in the CSV file. If it is, build the new filename and call shutil.move() to rename the file. If it is not, then print the unknown file.
You can use a CVS Reader to read the filenames and create a dictionary mapping of id to new file name. Then you can use os.listdir() with os.path.splitext() and shutil.move() to rename your files.

How to iterate through file names and print out the corresponding file names with pathlib.glob()

My Directory looks like this:
P1_SAMPLE.csv
P5_SAMPLE.csv
P7_SAMPLE.csv
P10_SAMPLE.csv
How do I iterate through files in this directory using pathlib.glob() if I want to print out the corresponding file names?
My code looks like this at the moment:
from pathlib import Path
file_path = r'C:\Users\HP\Desktop\My Directory'
for fle in Path(file_path).glob('P*_SAMPLE.csv'):
print() # What should the code here be?
I want my output to print this out:
P1_sam
P5_sam
P7_sam
P10_sam
Many thanks in advance!
from pathlib import Path
file_path = r'C:\Users\HP\Desktop\My Directory'
for fle in Path(file_path).glob('P*_SAMPLE.csv'):
first = fle.name.split('_')[0]
second = fle.name.split('_')[1]
print("{}_{}".format(first, second[:3].lower()))
Output :
P10_sam
P1_sam
P4_sam
P5_sam

Changing the name of multiple files based on an index

I currently have several hundred pdf files with file names that I would like to change.
The current names of the files don't really follow a pattern, however I have a Excel file with what the current file name is and what the new file name is that I want for a specific file. This looks similar to this:
I am looking for a way in python to rename all of my files (about 500) according to my excel index.
What I tried:
import os
path = 'C:\\Users\\Desktop\\Project\\'
files = os.listdir(path)
for file in files:
os.rename(os.path.join(path, file), os.path.join(path, '00' + file + '.pdf'))
Thanks.
If you can save the excel file as csv, this should work
import os
import csv
path = 'C:\\Users\\Desktop\\Project\\'
with open('my_csv.csv') as f:
reader = csv.reader(f)
next(reader) # Skip first row
for line in reader:
src = os.path.join(path, line[0])
dest = os.path.join(path, line[1])
os.rename(src, dest)
You are really close!
You need to iterate over the names in your xlsx file. One simple way is to load the data using pandas.read_excel and finally iterate over the source and dest column and renaming the file.
You can use os.pth.join to create the full path from a given folder and a given file.
Here the code:
# Import module
import os # Rename file
import pandas as pd # read csv
# Your different folders
path_folder = r'C:\Users\Desktop\Project'
path_csv = r'C:\Users\Desktop\Project\csv_file.xlsx'
# Load data
df = pd.read_excel(path_csv)
print(df)
# Current file name Desired file name
# 0 a.pdf 001.pdf
# 1 b.pdf 002.pdf
# 2 c.pdf 003.pdf
# Iterate over each row of the dataframe
for old_name, new_name in zip(df["Current file name"], df["Desired file name"]):
# Create source path and destination path
source_file = os.path.join(path_folder, old_name)
dest_file = os.path.join(path_folder, new_name)
# Rename the current file using the source path (old name)
# and the destination path (new name)
os.rename(source_file, dest_file )
Excel file used:
Hope that helps !
Provided you have table with names, you can use the following code:
import os
names = '''a.pdf 001.pdf
b.pdf 002.pdf
c.pdf 003.pdf'''
os.chdir(r'C:\Users\Desktop\Project')
for line in names.splitlines(False):
old, new = line.split()
os.rename(old, new)
You can copy table from Excel to this piece of code
If you don't care about table, you can try
import os
from itertools import count
numbers = count(1)
os.chdir(r'C:\Users\Desktop\Project')
for old in os.listdir('.'):
if not old.endswith('.pdf'):
continue
new = '%03d.pdf' % next(numbers)
os.rename(old, new)

Categories

Resources