File naming when working with paths - python

So I have the following code and I'm trying to export a csv and immediately open it in Python.
# define weekly pull code
def GT_Weekly_Run(keys):
# connect to Google
connector = pyGTrends(google_username, google_password)
# make request
connector.request_report(keys, geo="US")
# wait a random amount of time between requests to avoid bot detection
time.sleep(randint(5, 10))
# download file
connector.save_csv(path, '_' + "GT_Weekly" + '_' + keys)
name = path, '_' + "GT_Weekly" + '_' + keys
with open(name + '.csv', 'rt') as csvfile:
csvReader = csv.reader(csvfile)
data = []
data = [row for row in csvReader if row and row[0].startswith("20")]
week_df = pd.DataFrame(data)
cols = ["Date", "Trend"]
week_df.columns = [cols]
The problem is that I'm not able to match the save as file name with the open file name. Have tried a number of things but keep getting errors regarding
IOError: [Errno 2] No such file or directory: 'GT_Weekly_football.csv'
TypeError: can only concatenate tuple (not "str") to tuple
Is there anything that looks off. I just need to go from saving the file as X and using that same name (X) to import it back in.
Thanks!

I would recommend you create a variable to hold the filename. That way, the same name will be used both for creation and loading back.
import os
# define weekly pull code
def GT_Weekly_Run(keys):
# connect to Google
connector = pyGTrends(google_username, google_password)
# make request
connector.request_report(keys, geo="US")
# wait a random amount of time between requests to avoid bot detection
time.sleep(randint(5, 10))
# download file
filename = "_GT_Weekly_" + keys
connector.save_csv(path, filename)
with open(os.path.join(path, filename), 'rt') as csvfile:
csvReader = csv.reader(csvfile)
data = []
data = [row for row in csvReader if row and row[0].startswith("20")]
week_df = pd.DataFrame(data)
cols = ["Date", "Trend"]
week_df.columns = [cols]
It is safer to make use of Python's os.path.join function to create your full file names.
Also take a look at the keys parameter you are passing to GT_Weekly_Run, it should just be a simple string.

Related

Create output file based on changing input value

I have the following txt files (10000s) in multiple directories eg.
BaseDirectory\04_April\2019-04-14\UniqeDirectoryName1 (username)\345308457384745637.txt
BaseDirectory\04_April\2019-04-14\UniqeDirectoryName2 (username)\657453456456546543.txt
BaseDirectory\04_April\2019-04-14\UniqeDirectoryName3 (username)\234545743564356774.txt
BaseDirectory\05_May\2019-05-14\UniqeDirectoryName1 (username)\266434564564563565.txt
BaseDirectory\05_May\2019-05-14\UniqeDirectoryName2 (username)\934573845739632048.txt
BaseDirectory\05_May\2019-05-14\UniqeDirectoryName3 (username)\634534534535654501.txt
so in other words in each date folder there are multiple directories that again contains text files.
import os
import re
import csv
for path, subdirs, files in os.walk("E:\\BaseDir\\"):
for name in files:
file_fullinfo = os.path.join(path, name)
path, filename = os.path.split(file_fullinfo)
NoExtension = os.path.splitext(file_fullinfo)[0]
file_noext = str(NoExtension)
file_splitinfo = re.split('\\\\', file_noext, 0)
file_month = file_splitinfo[2]
file_date = file_splitinfo[3]
file_folder = re.sub(r'\([^)]*\)', '', file_splitinfo[4])
file_name = file_splitinfo[5]
file_category = file_folder
My script generates the following..
['E:', 'BaseDirectory', '04_April', '2019-04-09', 'UniqeDirectoryName', '345308457384745637.txt', 'UniqeDirectoryName']
So far so good, writing this to a generic CSV file is also straight forward, but I want to create a new CSV file based on the changing date like this.
E:\BaseDir\2019-04-09.csv
file_folder, file_name, file_category
'UniqeDirectoryName', '543968732948754398','UniqeDirectoryName'
'UniqeDirectoryName', '345308457384745637','UniqeDirectoryName'
'UniqeDirectoryName', '324089734983987439','UniqeDirectoryName'
E:\BaseDir\2019-05-14.csv
file_folder, file_name, file_category
'UniqeDirectoryName', '543968732948754398','UniqeDirectoryName'
'UniqeDirectoryName', '345308457384745637','UniqeDirectoryName'
'UniqeDirectoryName', '324089734983987439','UniqeDirectoryName'
How can I accomplise this can't quite wrap my head a around it, the struggle of being a Python noob is real.. :)
If you can live without the first line as a header row it can be achieved quite simply.
output_file_path = 'D:/output_files/' + file_date + '.csv'
with open(file=output_file_path, mode='a') as csv_file: #open a csv file to write to in append mode
csv_file.write("my data\n")
if you absolutely must have the header then you can test if the file exists first, if it doesn't exist write the header row
import os.path
output_file_path = 'D:/output_files/' + file_date + '.csv'
if not os.path.exists(output_file_path): #open a csv file to write header row if doesn't exist
with open(file=output_file_path, mode='a') as csv_file:
csv_file.write("my header row\n")
with open(file=output_file_path, mode='a') as csv_file: #open a csv file to write to in append mode
csv_file.write("my data\n")

Need help renaming files referring to a CSV whilst dealing with duplicate names etc

I am trying to create a program to rename a large quantity of files according to a CSV provided by a client. The CSV contains in:
Row [0] - The current name to be replaced
Row [1] - The name to replace it with
I have encountered issues with duplicate file names within the CSV in the past, in this scenario I would like to automatically add "_n" (n = duplicate number) to the file names when appropriate.
The files and the CSV are all located in the same folder.
import os
import sys
import csv
print('Hey')
def rename_files(csv_filename):
with open('rename_csv.csv', 'r') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
files_mapping = {}
new_filenames = set()
for row in csvreader:
name = row[0] + '.wav'
new = row[1] + '.wav'
if new in new_filenames:
raise Exception(
'Found duplicate filename {} for file {}'.format(new, name)
)
new_filenames.add(new)
files_mapping['name'] = new
for name, new in files_mapping.items():
if os.path.exists(name):
print('\n' + 'renaming ' + name)
os.rename(name, new)
else:
print('\n' + name + " does not exist")
if __name__ == "__main__":
rename_files(sys.argv[1])
expected result is that the files labelled (in this case...) 1.wav, 2.wav, 3.wav, etc. will be renamed to a.wav, b.wav, c.wav, etc. as per the CSV. Instead it seems to run but not change any file names.
Terminal output:
Hey
name does not exist
You are incorrectly building the files mapping dictionary. I think on this line
files_mapping['name'] = new
you meant to refer to the name variable - so replace it with
files_mapping[name] = new

Using CSV Sniffer to determine a delimiter but doesnt work on multiple files

I'm using the Sniffer class in CSV Reader to determine what a delimiter is in a CSV file and it works on single files but if I add in a loop and point it to a folder with the same CSV in, it throws out this error:
File "delimiter.py", line 17, in read_csv_delimit
reader = csv.reader(csvfile, dialect)
TypeError: "delimiter" must be a 1-character string
The script looks like this:
#!/usr/local/bin/python3
import csv
import os
def read_csv_delimit(file_dir, csv_file):
# Initialise list
file_csv = []
# Open csv & check delimiter
with open(file_dir + "/" + csv_file, newline='', encoding = "ISO-8859-1") as csvfile:
dialect = csv.Sniffer().sniff(csvfile.read(1024))
csvfile.seek(0)
reader = csv.reader(csvfile, dialect)
for item in reader:
file_csv.append(item[0])
#del file_csv[0]
return file_csv
def split_path(full_path):
#path = path.rstrip(os.sep)
head, tail = os.path.split(full_path)
return (head, tail)
machine_dir = input("Drop the folder here: ")
# Get list of machine csv
machines = os.listdir(machine_dir)
for machine in machines:
print(machine)
#file_dir, csv_file = split_path(csv_file)
machine_list = read_csv_delimit(machine_dir, machine)
print(machine_list)
Given the trace, it seems that your script does pick non-CSV files, indeed. You can use the glob module for fine-tuning the search pattern to pick up only the files you want, but even a simple extension lookup should suffice:
target = input("Drop the folder here: ")
machine_list = [read_csv_delimit(target, m) for m in os.listdir(target) if m[-4:] == ".csv"]
print(machine_list)
Checking for the entered directory validity, tho, is highly recommended, even if it's performed with the simplest os.path.isdir(target).
I'd also recommend you to use os.path facilities to build up your path in the read_csv_delimit() function, e.g.:
with open(os.path.join(file_dir, csv_file), newline='', encoding = "ISO-8859-1") as csvfile:

How to Read CSV, Create a QR Code, and Write its Filename to New Column?

I'm writing a Python script to generate a QR code from the first column in a csv (concatenated with a local name), and that part works well. The csv just has three columns and looks like this:
ID First Last
144 Jerry Seinfeld
491 George Costanza
104 Elaine Benes
99 Cosmo Kramer
And I use my Python script to take that file, append a prefix to the IDs (in this case, 'NBC') and then create QR codes for each record in a new folder. It's a little long but all of this seems to work fine also:
import csv
import qrcode
import os
import shutil
import time
import inquirer
#Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
#Load csv
filename = "stackoverflowtest.csv"
#Path to new local folder
localfolder = local
localimagefolder = localfolder+'/image'
localfilefolder = localfolder+'/files'
#Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
#Copy uploaded file to their local's file folder
shutil.copy2(filename, localfilefolder+'/'+local+'-'+timestr+'.csv') # complete target filename given
#Read csv and generate QR code for local+first column of csv
with open(filename, 'rU') as csvfile:
next(csvfile, None) #skip header row
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
for i, row in enumerate(reader):
labeldata = row[0] #Choose first column of data to create QR codes
print labeldata
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
qr.add_data(local+"-"+labeldata)
qr.make()
img = qr.make_image()
img.save(localimagefolder+"/"+local+"-"+labeldata+".png".format(i)) #Save image
It creates the NBC folder, copies each csv file in one subfolder, and creates the QR codes for each ID (NBC-144,NBC-491,NBC-104,NBC-99) in another.
The part where I'm running into a problem is opening the csv and writing the filepath/filename back to the csv (or a copy of the csv since from what I've read, I likely can't do it to the same one). Is that possible?
The closest I've come with a script that works is appending the local name with the ID and writing that back to a column but I can't seem to figure out how to do the same with a variable, let alone a filepath/filename:
import csv
import os
import sys
filename = 'stackoverflowtest.csv'
newfilename = 'stackoverflowtest2.csv'
local = 'NBC'
with open(filename, 'rU') as f:
reader = csv.reader(f)
with open(newfilename, 'w') as g:
writer = csv.writer(g)
for row in reader:
new_row = row[0:] + ['-'.join([local, row[0]])]
writer.writerow(new_row)
Is it possible to write something like that within my existing script to add a column for the filepath and filename? Everything I try breaks -- especially if I attempt to do it in the same script.
EDIT:
This is my closest attempt that overwrote the existing file
f=open(newfilename,'r+')
w=csv.writer(f)
for path, dirs, files in os.walk(path):
for filename in files:
w.writerow([newfilename])
Also it's still in a separate script.
Since I can't run the code in your question directly, I had to commented-out portions of it in what's below for testing, but think it does everything you wanted in one loop in one script.
import csv
#import qrcode
import os
import shutil
import time
#import inquirer
# Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
# Load csv
filename = "stackoverflowtest.csv"
# Path to new local folder
localfolder = local
localimagefolder = os.path.join(localfolder, 'image')
localfilefolder = os.path.join(localfolder, 'files')
# Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
# Copy uploaded file to their local's file folder
target = os.path.join(localfilefolder, local+'-'+timestr+'.csv') # Target filename
#shutil.copy2(filename, target) # Don't need to do this.
# Read csv and generate QR code for local+first column of csv
with open(filename, 'rb') as csvfile, open(target, 'wb') as outfile:
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
writer = csv.writer(outfile, delimiter=',', dialect=csv.excel_tab)
next(reader) # Skip header row.
for row in reader:
id, first, last = row
# qr = qrcode.QRCode(
# version=1,
# error_correction=qrcode.constants.ERROR_CORRECT_L,
# box_size=10,
# border=4,
# )
#
# qr.add_data(local+"-"+id)
# qr.make()
#
# img = qr.make_image()
imagepath = os.path.join(localimagefolder, local+"-"+id+".png")
# img.save(imagepath) # Save image.
print "saving img:", imagepath
writer.writerow(row + [local+'-'+id, imagepath])
Output from sample input data:
144,Jerry,Seinfeld,NBC-144,NBC/image/NBC-144.png
491,George,Costanza,NBC-491,NBC/image/NBC-491.png
104,Elaine,Benes,NBC-104,NBC/image/NBC-104.png
99,Cosmo,Kramer,NBC-99,NBC/image/NBC-99.png

Pyshp appends new fields but not record values to an existing shapefile

The problem consists in append columns presented in a .csv file as new fields to an existing shapefile. So, I've used Python and the modules pyshp and csv to, first, copy the content of the original shapefile (geometries and records) and, second, create new fields in this copy and iterate in the respective .csv rows in order to insert on it:
import os, sys
import shapefile, csv
from os.path import basename
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
# reader
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
# writer
w = shapefile.Writer(r.shapeType)
# copy shapefiles content
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# add new records from the csv
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
headers = reader.fieldnames
[w.field(field) for field in headers]
for row in reader:
w.record(*tuple([row[f] for f in headers])) # <-- insertion in specific fields
w.save(output_full)
In the pyshp page, there are a couple of examples. One of them is specific to insertion of rows to a specific field. As follows:
>>> w = shapefile.Writer()
>>> w.field('FIRST_FLD','C','40')
>>> w.field('SECOND_FLD','C','40')
>>> w.record('First', 'Line')
>>> w.record(FIRST_FLD='First', SECOND_FLD='Line')
but, even indicating the fields, I get:
Traceback (most recent call last):
File "assigning-shapefile.py", line 68, in <module>
w.record(*tuple([row[f] for f in headers]))
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in record
record = [recordList[i] for i in range(fieldCount)]
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in <listcomp>
record = [recordList[i] for i in range(fieldCount)]
IndexError: tuple index out of range
and, if we look inside the shapefile, we have something like this:
QGIS attribute table before and after the code execution
which I concluded that the fields are successful added, but the rows (w.record with the fields name specified) are not.
Solved the problem using a quite simple approach with osgeo library:
# --
# USAGE:
# python3 assinging-shapefile.py [input-shapefile] [output-shapefile]
# --
# REQUISITE:
# The classification csv file should be edited as a header of classifiers and its labels. The file name is mandatory to be IMAGE_NAME-classified.csv
# Ex:
# Filename: IMAGE_NAME-classified.csv
# Content:
# Random forest, Multilayer-Perc, CRF, SVM
# vegetation, vegetation, building, vegetation
# wall, window, window, window
# ...
# --
import os, sys
import shapefile, csv
from os.path import basename
from osgeo import ogr
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
w = shapefile.Writer(r.shapeType)
# copy shapefile
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# read the csv records
csvRecords = []
csvHeaders = []
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
csvHeaders = reader.fieldnames
for line in reader:
csvRecords.append(line)
driver = ogr.GetDriverByName('ESRI Shapefile')
infile = driver.Open(output_full, 1)
for classifier in csvHeaders:
field = ogr.FieldDefn(classifier, ogr.OFTString)
field.SetWidth(16)
layer = infile.GetLayer()
layer.CreateField(field)
cont = 0
for feature in layer:
for classifier in csvHeaders:
if(feature.GetField(0)!=cont):
cont += 1
feature.SetField(classifier, csvRecords[cont][classifier])
layer.SetFeature(feature)
infile=None
which is able (i) to read the csv file (with the columns to be added), (ii) read the shapefile and copy it, (iii) modify the .shp copy by editing each row with a correspondent csv record.

Categories

Resources