avoid opening an unnecessary tkinder window - python

I'm trying to save a csv file as a .json file. But when the following code runs, always opens a small tkinter window, along with the load window. How this can be avoided?
import csv
import json
import pandas as pd
from tkinter import filedialog
# which is used to save file in any extension
from tkinter.filedialog import asksaveasfile
class Manipulate_data:
def convert_csv_to_json(self):
csv_file_path = filedialog.askopenfilename(initialdir = '/Desktop',
title = 'Select a CSV file',
filetypes = (('csv file','*.csv'),
('csv file','*.csv')))
print(csv_file_path)
#create a dictionary
data_dict = {}
#Step 2
#open a csv file handler
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
#convert each row into a dictionary
#and add the converted data to the data_variable
for rows in csv_reader:
#assuming a column named 'No'
#to be the primary key
key = rows['id']
data_dict[key] = rows
#open a json file handler and use json.dumps
#method to dump the data
#Step 3
files = [('JSON Files', '*.json')]
json_file_path = asksaveasfile(filetypes = files, defaultextension = files)
with open(json_file_path.name, 'w', encoding = 'utf-8') as json_file_handler:
#Step 4
json_file_handler.write(json.dumps(data_dict, indent = 4))
#driver code
#be careful while providing the path of the csv file
#provide the file path relative to your machine
Manipulate_data().convert_csv_to_json()

Related

How to get Python to ask for directory, read each filename in the dir and place that filename(without .csv) in the same file respectively?

I'm very new to Python, I have several hundred folders with many thousands of files each containing times series data. Each file has an anonymized unique filename followed by an underscore and file number for each file (0 thru however many files are in the folder) "18667_0.csv, 18667_1.csv, 18667_3.csv" associated with it. I need to run a for loop that will take this unique filename "18667" and place it in whatever cell(Im using excel to read the csv) I choose for the same csv file. So if I have 300 files in the folder all will have the unique "18667" and I just want that number to be placed in the file. I have part of the code for the dir request but I've not been successful in combining the right read/write statements to implement this task. Here's what I have
import tkinter as tk
import pandas as pd
from tkinter import filedialog
from os import listdir
def find_csv_filenames( path_to_dir, suffix=".csv" ):
filenames = listdir(path_to_dir)
return [ filename for filename in filenames if filename.endswith( suffix ) ]
root = tk.Tk()
root.withdraw()
folder_path = filedialog.askdirectory()
all_csvfiles = find_csv_filenames(folder_path, suffix= ".csv")
for filename in all_csvfiles:
print(filename)
a = filename
with open(a, 'w', newline="") as file:
csvwriter = csv.writer(file) # 2. create a csvwriter object
csvwriter.writerow(a)
Wound up doing this.
import tkinter as tk
from tkinter import filedialog
from os import listdir
# Listing all csv in folder
def find_csv_filenames(path_to_dir, suffix=".csv"):
filenames = listdir(path_to_dir) #returns all filenames
return [filename for filename in filenames if filename.endswith(suffix)]
#returns only csv filenames in a list []
#This is GUI for asking to select folder
root = tk.Tk()
root.withdraw()
folder_path = filedialog.askdirectory() #Returns the folder you selected
all_csvfiles = find_csv_filenames(folder_path, suffix=".csv") #lists filenames
print(folder_path)
for filename in all_csvfiles: #begin loop thru all csv files
print(filename)
# a = filename
InsertedName = filename[:-4] #removes last 4 chars from filename
print(InsertedName)
tempfile = open(folder_path + "/temp.csv", "w") # creates a tempfile for
writing(w) in the same dir
tempfile.write(InsertedName+"\n") #will write filename to temp and instruct
for newline
sourcefile = open(folder_path + "/" + filename, "r") #creates a sourcefile
for reading original
for line in sourcefile: #begins loop for writing original data to tempfile
tempfile.write(line) #writes the data
sourcefile.close() #closes sourcefile
Finalfile = open(folder_path + "/" + filename, "w") #creating finalfile to
overwrite existing file in dir
tempfile.close() #closes tempfile to return pointer to the top of file
tempfile = open(folder_path + "/temp.csv", "r") #opens tempfile to write
tempfile data to finalfile
for line in tempfile: #loops the lines to write
Finalfile.write(line)
tempfile.close()
Finalfile.close()

How process csv to hdfs using apache nifi?

Hello guys hope you doing well !
I have some csv files want to put them in hdfs and if a file already exists it should append his content to the existing content I tries a script in python but with no results
import os
import pandas as pd
from os import path
import sys,json
import csv
from csv import writer,reader
data = json.load(sys.stdin)
technologies = ['KPI_2G_NPO','GPRS']
old_path = data["old.path"]
filename = data["filename"]
old_path = old_path.replace("C:\\Users\\12\\Desktop\\APACHE~1\\NIFI-1~1.1\\","")
old_path = old_path.replace("/","")
old_path_list = old_path.split('\\')
def append_list_as_row(file_name, list_of_elem):
with open(file_name, 'a+', newline='') as write_obj:
csv_writer = writer(write_obj)
csv_writer.writerow(list_of_elem)
df = pd.read_csv(data["new.path"]+data["filename"])
columns = df.columns.values.tolist()
for tech in technologies:
if (tech in filename and old_path_list[0] in filename):
if path.exists("hdfs://quickstart.cloudera:8020/user/cloudera/data/"+tech+"_"+old_path_list[0]+".csv"):
header_saved = True
with open(data["new.path"]+data["filename"]) as file2:
header = next(file2)
header = next(file2)
if header_saved:
for line in file2:
append_list_as_row("hdfs://quickstart.cloudera:8020/user/cloudera/data/"+tech+"_"+old_path_list[0]+".csv",list(line.split(",")))
os.remove(data["new.path"]+data["filename"])
else:
df.to_csv("hdfs://quickstart.cloudera:8020/user/cloudera/data/"+tech+"_"+old_path_list[0]+".csv")
os.remove(data["new.path"]+data["filename"])
and here's my nifi pipline picture

How to Read CSV, Create a QR Code, and Write its Filename to New Column?

I'm writing a Python script to generate a QR code from the first column in a csv (concatenated with a local name), and that part works well. The csv just has three columns and looks like this:
ID First Last
144 Jerry Seinfeld
491 George Costanza
104 Elaine Benes
99 Cosmo Kramer
And I use my Python script to take that file, append a prefix to the IDs (in this case, 'NBC') and then create QR codes for each record in a new folder. It's a little long but all of this seems to work fine also:
import csv
import qrcode
import os
import shutil
import time
import inquirer
#Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
#Load csv
filename = "stackoverflowtest.csv"
#Path to new local folder
localfolder = local
localimagefolder = localfolder+'/image'
localfilefolder = localfolder+'/files'
#Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
#Copy uploaded file to their local's file folder
shutil.copy2(filename, localfilefolder+'/'+local+'-'+timestr+'.csv') # complete target filename given
#Read csv and generate QR code for local+first column of csv
with open(filename, 'rU') as csvfile:
next(csvfile, None) #skip header row
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
for i, row in enumerate(reader):
labeldata = row[0] #Choose first column of data to create QR codes
print labeldata
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
qr.add_data(local+"-"+labeldata)
qr.make()
img = qr.make_image()
img.save(localimagefolder+"/"+local+"-"+labeldata+".png".format(i)) #Save image
It creates the NBC folder, copies each csv file in one subfolder, and creates the QR codes for each ID (NBC-144,NBC-491,NBC-104,NBC-99) in another.
The part where I'm running into a problem is opening the csv and writing the filepath/filename back to the csv (or a copy of the csv since from what I've read, I likely can't do it to the same one). Is that possible?
The closest I've come with a script that works is appending the local name with the ID and writing that back to a column but I can't seem to figure out how to do the same with a variable, let alone a filepath/filename:
import csv
import os
import sys
filename = 'stackoverflowtest.csv'
newfilename = 'stackoverflowtest2.csv'
local = 'NBC'
with open(filename, 'rU') as f:
reader = csv.reader(f)
with open(newfilename, 'w') as g:
writer = csv.writer(g)
for row in reader:
new_row = row[0:] + ['-'.join([local, row[0]])]
writer.writerow(new_row)
Is it possible to write something like that within my existing script to add a column for the filepath and filename? Everything I try breaks -- especially if I attempt to do it in the same script.
EDIT:
This is my closest attempt that overwrote the existing file
f=open(newfilename,'r+')
w=csv.writer(f)
for path, dirs, files in os.walk(path):
for filename in files:
w.writerow([newfilename])
Also it's still in a separate script.
Since I can't run the code in your question directly, I had to commented-out portions of it in what's below for testing, but think it does everything you wanted in one loop in one script.
import csv
#import qrcode
import os
import shutil
import time
#import inquirer
# Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
# Load csv
filename = "stackoverflowtest.csv"
# Path to new local folder
localfolder = local
localimagefolder = os.path.join(localfolder, 'image')
localfilefolder = os.path.join(localfolder, 'files')
# Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
# Copy uploaded file to their local's file folder
target = os.path.join(localfilefolder, local+'-'+timestr+'.csv') # Target filename
#shutil.copy2(filename, target) # Don't need to do this.
# Read csv and generate QR code for local+first column of csv
with open(filename, 'rb') as csvfile, open(target, 'wb') as outfile:
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
writer = csv.writer(outfile, delimiter=',', dialect=csv.excel_tab)
next(reader) # Skip header row.
for row in reader:
id, first, last = row
# qr = qrcode.QRCode(
# version=1,
# error_correction=qrcode.constants.ERROR_CORRECT_L,
# box_size=10,
# border=4,
# )
#
# qr.add_data(local+"-"+id)
# qr.make()
#
# img = qr.make_image()
imagepath = os.path.join(localimagefolder, local+"-"+id+".png")
# img.save(imagepath) # Save image.
print "saving img:", imagepath
writer.writerow(row + [local+'-'+id, imagepath])
Output from sample input data:
144,Jerry,Seinfeld,NBC-144,NBC/image/NBC-144.png
491,George,Costanza,NBC-491,NBC/image/NBC-491.png
104,Elaine,Benes,NBC-104,NBC/image/NBC-104.png
99,Cosmo,Kramer,NBC-99,NBC/image/NBC-99.png

Pyshp appends new fields but not record values to an existing shapefile

The problem consists in append columns presented in a .csv file as new fields to an existing shapefile. So, I've used Python and the modules pyshp and csv to, first, copy the content of the original shapefile (geometries and records) and, second, create new fields in this copy and iterate in the respective .csv rows in order to insert on it:
import os, sys
import shapefile, csv
from os.path import basename
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
# reader
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
# writer
w = shapefile.Writer(r.shapeType)
# copy shapefiles content
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# add new records from the csv
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
headers = reader.fieldnames
[w.field(field) for field in headers]
for row in reader:
w.record(*tuple([row[f] for f in headers])) # <-- insertion in specific fields
w.save(output_full)
In the pyshp page, there are a couple of examples. One of them is specific to insertion of rows to a specific field. As follows:
>>> w = shapefile.Writer()
>>> w.field('FIRST_FLD','C','40')
>>> w.field('SECOND_FLD','C','40')
>>> w.record('First', 'Line')
>>> w.record(FIRST_FLD='First', SECOND_FLD='Line')
but, even indicating the fields, I get:
Traceback (most recent call last):
File "assigning-shapefile.py", line 68, in <module>
w.record(*tuple([row[f] for f in headers]))
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in record
record = [recordList[i] for i in range(fieldCount)]
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in <listcomp>
record = [recordList[i] for i in range(fieldCount)]
IndexError: tuple index out of range
and, if we look inside the shapefile, we have something like this:
QGIS attribute table before and after the code execution
which I concluded that the fields are successful added, but the rows (w.record with the fields name specified) are not.
Solved the problem using a quite simple approach with osgeo library:
# --
# USAGE:
# python3 assinging-shapefile.py [input-shapefile] [output-shapefile]
# --
# REQUISITE:
# The classification csv file should be edited as a header of classifiers and its labels. The file name is mandatory to be IMAGE_NAME-classified.csv
# Ex:
# Filename: IMAGE_NAME-classified.csv
# Content:
# Random forest, Multilayer-Perc, CRF, SVM
# vegetation, vegetation, building, vegetation
# wall, window, window, window
# ...
# --
import os, sys
import shapefile, csv
from os.path import basename
from osgeo import ogr
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
w = shapefile.Writer(r.shapeType)
# copy shapefile
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# read the csv records
csvRecords = []
csvHeaders = []
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
csvHeaders = reader.fieldnames
for line in reader:
csvRecords.append(line)
driver = ogr.GetDriverByName('ESRI Shapefile')
infile = driver.Open(output_full, 1)
for classifier in csvHeaders:
field = ogr.FieldDefn(classifier, ogr.OFTString)
field.SetWidth(16)
layer = infile.GetLayer()
layer.CreateField(field)
cont = 0
for feature in layer:
for classifier in csvHeaders:
if(feature.GetField(0)!=cont):
cont += 1
feature.SetField(classifier, csvRecords[cont][classifier])
layer.SetFeature(feature)
infile=None
which is able (i) to read the csv file (with the columns to be added), (ii) read the shapefile and copy it, (iii) modify the .shp copy by editing each row with a correspondent csv record.

Populate Excel from Python list

I want to populate a excel sheet with data read from a text file.
My script opens a text file and then puts certain attributes into a list. I then want to populate my excel sheet with data in the list.
I have many txt documents in the same location so my script loops though the files.
What is wrong with my code ? It only populates one row.
import xlwt
import os
output = 'D:\Holding_Area\\test.xls'
file_path='Y:\\Testing\\Crashes'
pathappend=[]
r=1
for a in os.listdir(file_path):
pathappend.append(file_path+'\\'+a)
def main():
for x in pathappend:
appendlist=[]
wbk = xlwt.Workbook()
sheet = wbk.add_sheet('python', cell_overwrite_ok=True)
file = open(x)
lines = file.readlines()
appendlist.append(lines[1][2:10])
appendlist.append(lines[1][13:21])
appendlist.append(lines[4][15:30])
appendlist.append(lines[10][13:22])
appendlist.append(lines[11][9:28])
appendlist.append(lines[22])
appendlist.append(lines[31][84:113])
appendlist.append(lines[27:29])
file.close()
for i,e in enumerate(appendlist):
sheet.write(r,i,e)
r+1
wbk.save(output)
main()
Issue was with the 'r+1' it should be 'r+=1'; as shown below:
import xlwt
import os
output = 'D:\Holding_Area\\test.xls'
file_path='Y:\\Testing\\Crashes'
pathappend=[]
r=1
for a in os.listdir(file_path):
pathappend.append(file_path+'\\'+a)
def main():
for x in pathappend:
appendlist=[]
wbk = xlwt.Workbook()
sheet = wbk.add_sheet('python', cell_overwrite_ok=True)
file = open(x)
lines = file.readlines()
appendlist.append(lines[1][2:10])
appendlist.append(lines[1][13:21])
appendlist.append(lines[4][15:30])
appendlist.append(lines[10][13:22])
appendlist.append(lines[11][9:28])
appendlist.append(lines[22])
appendlist.append(lines[31][84:113])
appendlist.append(lines[27:29])
file.close()
for i,e in enumerate(appendlist):
sheet.write(r,i,e)
r+=1
wbk.save(output)
main()

Categories

Resources