numpyload.txt ValueError: could not convert string to float: Id - python

I am not sure of why i am getting the error mentioned.
Here, is the code which contains the numpyload.txt
def load(name):
print("start reading file with target")
wfile = open(name, "r")
line = wfile.readline().replace("\n", "")
print line
splits = line.split(",")
print splits
datalen = len(splits)
print datalen
wfile.close()
X = np.loadtxt(open(name), delimiter=',', usecols=range(0, datalen), skiprows=0)
print("done")
return np.array(X)
Here is the sample output of the csv file. *Note not listing all as there is 501 items in the csv file.
Id,asmSize,bytesSize,asmCompressionRate,bytesCompressionRate,ab_ratio,abc_ratio,ab2abc_ratio,sp_,...

Related

Print text file line by line in an Excel Sheet

I am trying to read a text file line by line and then print it to an excel sheet line by line
Here is what I have so far
for x in ABC:
print(f"{x}:")
sheet1[cellLocLastRow('A')] = f"{x}:"
try:
with open(f"./{x}/Log.txt") as f:
textRead= (f.read())
print(textRead)
sheet1[cellLocLastRow('A')] = textRead
except FileNotFoundError:
print("File does not exist")
sheet1[cellLocLastRow('A')] = "File does not exist"
It prints it out the text file to the excel sheet but all in one row like this
1
But I would like my text file to be printed out like this
2
If you were wondering why I am using [cellLocLastRow('A')] , I am using that instead of a [A17] because I am printing out unknown lengths of documents into an excel sheet and so it counts the rows.
def cellLocLastRow(colChar):
global lastRow
curRow = lastRow
lastRow += 1
return cellLoc(colChar, curRow)
The text file format is as follows:
TestName: TestName
TestName: Info::Info::Info::Info::f###::##.#ns
Total Errors: #
TestName: Info::Info::Info::Info::f###::##.#ns
Total Errors: #
TestName: Info::Info::Info::Info::f###::##.#ns
Total Errors: #
Did you tried f.readlines() method?
with open(text, 'r') as f:
content1 = f.readlines()
This script will return a list with all file's lines, then you can do whatever you want comfortably.
this is pretty easy with pylightxl
pip install pylightxl
lines = []
with open(“textfile.txt”) as f:
line = f.readline()
if not line:
break
lines.append(line)
import pylightxl as xl
db = xl.Database()
db.add_ws("Sheet1", {})
for i, line in enumerate(lines, start=1):
db.ws("Sheet1").update_index(i, 1,line)
xl.writexl(db, “output.xlsx”)

Extract information between strings from a file and write to csv

I would like to extract some info (between strings e.g. oldtime: ... oldtime!>) from a text file and write it in a CSV file. My input text file is like this:
=======================
oldtime:
hours:1:hours!>
minutes:12:minutes!>
oldtime!>
newtime:
hours:15:hours!>
minutes:17:minutes!>
newtime!>
oldtime:
hours:11:hours!>
minutes:22:minutes!>
oldtime!>
newtime:
hours:5:hours!>
minutes:17:minutes!>
newtime!>
==========================
I started with this but I can not go any further.
with open(inputfile, 'r') as f, open(outputfile.cvs, 'a') as f1:
f1.write("oldtime; newtime \n")
for row in f:
if "oldtime:" in str(row):
temp = re.split(r'(#oldtime[\n\r]|[\n\r]#oldtime!>)', str(row))
???
if "newtime:" in str(row):
temp = re.split(r'(#newtime[\n\r]|[\n\r]#newtime!>)', str(row))
I would like to get as an output a csv-file like this
oldtime newtime
01:12 15:17
11:22 05:17
Could you please help me? Thank you.
This is one approach using Regex and csv module.
Ex:
import re
import csv
with open(filename) as infile, open(filename_1, "w") as outfile:
data = infile.read()
hrs = re.findall(r"hours:(\d+):hours", data) #Get all HRS
mins = re.findall(r"minutes:(\d+):minutes", data) #Get All Mins
data = zip(hrs, mins)
writer = csv.writer(outfile) #Write CSV
writer.writerow(["oldtime", "newtime"]) #Header
for m, n in zip(data[0::2], data[1::2]):
writer.writerow([":".join(m), ":".join(n)]) #Write OLD time & New Time
Another solution close to the Rakesh solution assuming your file has always the same structure (oldtime -> hour -> min -> newtime -> hour -> min ...).
Extract all the number of a string with regex formula: match = re.findall(r'\d+', str_file)
Convert this list by joining hours and minutes: dates = [i+ ":" + j for i, j in zip(match[::2], match[1::2])]
Create a dataframe using the pandas module
Export the data
Here the code:
# Import module
import pandas as pd
with open("../temp.txt", 'r') as f:
# Read file as a string
str_file = f.read()
# Extract all numbers
match = re.findall(r'\d+', str_file)
print(match)
# ['1', '12', '15', '17', '11', '22', '5', '17']
# create dates
dates = [i+ ":" + j for i, j in zip(match[::2], match[1::2])]
print(dates)
# ['1:12', '15:17', '11:22', '5:17']
# create dataframe
df = pd.DataFrame({"oldtime": dates[::2],
"newtime": dates[1::2]})
print(df)
# oldtime newtime
# 0 1:12 15:17
# 1 11:22 5:17
# Export the data
df.to_csv("output.csv", index= False)
EDIT 1:
Assuming than the oldtime and newtime blocks can be swiped. Here I read the file line per line and categorise the oldtime and newtime in a dictionary. There are many slice but working on my test file.
# Import module
import pandas as pd
with open("../temp.txt", 'r') as f:
# Read file as a string
list_split = ["oldtime:", "newtime:"]
dates = {"oldtime:": [], "newtime:": []}
line = f.readline().rstrip('\n')
while True:
line = line.rstrip('\n')
print([line])
if line in list_split:
key = line
hours = f.readline().rstrip('\n').split(":")[1]
minutes = f.readline().rstrip('\n').split(":")[1]
dates[key].append(hours+':'+minutes)
line = f.readline()
if not line:
break
print(dates)
# {'oldtime:': ['1:12', '11:22'], 'newtime:': ['15:17', '5:17']}
# create dataframe
df = pd.DataFrame({"oldtime": dates["oldtime:"],
"newtime": dates["newtime:"]})
print(df)
# oldtime newtime
# 0 1:12 15:17
# 1 11:22 5:17
# Export the data
df.to_csv("output.csv", index=False)
EDIT 2:
import pandas as pd
with open("../temp.txt", 'r') as f:
# Read file as a string
list_split = ["oldtime:", "newtime:"]
dates = {"oldtime": [], "newtime": []}
line = f.readline().rstrip('\n')
while True:
# Ignore blank lines
if ("oldtime:" in line) or ("newtime:" in line):
# Process new "oldtime" or "newtime" block
# Class : either "oldtime" or "newtime"
class_time = line.replace(" ", "").rstrip('\n')[:-1]
# Default hour - minute values
hours = "24"
minutes = "60"
# Read next line
line = f.readline().rstrip('\n')
# While block not ended
while class_time + "!>" not in line:
# If hour in line: update hour
if 'hour' in line:
hours = line.split(":")[1]
# If minute in line: update minute
elif 'minute' in line:
minutes = line.split(":")[1]
# Read next line
line = f.readline().rstrip('\n')
# End block
# Add block read to dictionary
dates[class_time].append(hours+':'+minutes)
# Read next line
line = f.readline()
# If end of file: exit
if not line:
break
# create dataframe
df = pd.DataFrame({"oldtime": dates["oldtime"],
"newtime": dates["newtime"]})
# Export the data
df.to_csv("output.csv", index=False)
Hope that Help !
great question :).
Here's a simple solution I did, delaminating the string on the ":" character, turning the number strings into integers, combining them with :, and then writing them to a csv.
Here is the code:
import csv
f = "data.txt"
with open('data.txt','r') as f:
data = f.read()
data = data.split(sep=':')
nums = []
for i in data:
try:
nums.append(int(i))
except ValueError:
pass
times = []
for i in range(len(nums)):
if i%2 ==0:
times.append(str(nums[i]) + ":" + str(nums[i+1]))
num_rows = len(times)/2
with open('time_data.csv','w+',newline='') as f:
writer = csv.writer(f)
writer.writerow(['oldtime','newtime'])
for i in range(len(times)):
if i%2==0:
writer.writerow([times[i],times[i+1]])
After reading Rakesh's answer, I wrote this:
import re
import csv
list_i = ''
file_name = 'data.txt'
file_name1 = 'data_1.txt'
with open(file_name,'r') as f, open(file_name1,'w',newline='') as f1:
data = f.read()
list_1 = re.findall(r'hours:\d+:hours',data)
list_2 = re.findall(r'minutes:\d+:minutes',data)
for i in list_1:
list_i += i
list_2_i = ''
for i in list_2:
list_2_i += i
list_1 = re.findall(r'\d+',list_i)
list_2 = re.findall(r'\d+',list_2_i)
data = []
for i in range(len(list_1)):
if i%2==0:
data.append([str(list_1[i]) + ':' + str(list_2[i]),str(list_1[i+1]) + ':' + str(list_2[i+1])])
writer = csv.writer(f1)
writer.writerow(['oldtime','newtime'])
for i in data:
writer.writerow(i)
Also #Rakesh your code returned the error:
TypeError: 'zip' object is not subscriptable
Is there a way to fix this? :)

Reading comma separated values from text file in python

I have a text file consisting of 100 records like
fname,lname,subj1,marks1,subj2,marks2,subj3,marks3.
I need to extract and print lname and marks1+marks2+marks3 in python. How do I do that?
I am a beginner in python.
Please help
When I used split, i got an error saying
TypeError: Can't convert 'type' object to str implicitly.
The code was
import sys
file_name = sys.argv[1]
file = open(file_name, 'r')
for line in file:
fname = str.split(str=",", num=line.count(str))
print fname
If you want to do it that way, you were close. Is this what you were trying?
file = open(file_name, 'r')
for line in file.readlines():
fname = line.rstrip().split(',') #using rstrip to remove the \n
print fname
Note: its not a tested code. but it tries to solve your problem. Please give it a try
import csv
with open(file_name, 'rb') as csvfile:
marksReader = csv.reader(csvfile)
for row in marksReader:
if len(row) < 8: # 8 is the number of columns in your file.
# row has some missing columns or empty
continue
# Unpack columns of row; you can also do like fname = row[0] and lname = row[1] and so on ...
(fname,lname,subj1,marks1,subj2,marks2,subj3,marks3) = *row
# you can use float in place of int if marks contains decimals
totalMarks = int(marks1) + int(marks2) + int(marks3)
print '%s %s scored: %s'%(fname, lname, totalMarks)
print 'End.'
"""
sample file content
poohpool#signet.com; meixin_kok#hotmail.com; ngai_nicole#hotmail.com; isabelle_gal#hotmail.com; michelle-878#hotmail.com;
valerietan98#gmail.com; remuskan#hotmail.com; genevieve.goh#hotmail.com; poonzheng5798#yahoo.com; burgergirl96#hotmail.com;
insyirah_powergals#hotmail.com; little_princess-angel#hotmail.com; ifah_duff#hotmail.com; tweety_butt#hotmail.com;
choco_ela#hotmail.com; princessdyanah#hotmail.com;
"""
import pandas as pd
file = open('emaildump.txt', 'r')
for line in file.readlines():
fname = line.split(';') #using split to form a list
#print(fname)
df1 = pd.DataFrame(fname,columns=['Email'])
print(df1)

Text file parser outputs blank files

Long time listener, first time caller! So, I have this Python script that is for parsing a Google Base Feed text file. It's taking out particular pieces of data and creating a formatted file I can upload on to Bing Shopping. After finally getting it to run, I've discovered that it just outputs blank files instead of the cleaned up data I wanted. What am I missing here? I really appreciate any help! Fair warning, I'm a pretty big Python newb, and I've had a lot of help writing this already.
import sys,os
import pandas as pd
import datetime
def remove_quotes(data):
lines = data.split('\n')
for i, line in enumerate(lines):
lines[i] = lines[i].replace('"','')
print lines[i]
return data
def tab_error(index, line, output):
count = len(line.split('\t'))
if count != 19:
err = 'Tab issue at line {linenum} : {numtabs} extra tabs'.\
format(linenum=index,numtabs=(count-19))
print err
output.write(err+'\n')
return True
return False
def html_error(index, line, output):
htmltags = ['&fract12', '&39','&', '&qt;', '<', '&rt;','"','>','quot','’']
for tag in htmltags:
if line.find(tag) > 0:
err = 'HTML issue at line {linenum}'.\
format(linenum=index)
print err
output.write(err+'\n')
return True
return False
def read_data(filename):
with open(filename,'r') as infile:
data = infile.read()
return data
def tabs_check(data, output, filename):
with open(filename,'w') as cleanfile:
header = ''
for x in xrange(19):
header += 'x'+str(x+1)+'\t'
cleanfile.write(header)
# for each line in the file
for i, line in enumerate(data.split('\r')[1:]):
# check line for tabs error
data_error = tab_error(i, line, output)
newline = line.replace('"','')
newline=newline.strip()
if not data_error:
cleanfile.write('\n'+newline)
def html_check(data, output, filename):
with open(filename,'w') as cleanfile:
# for each line in the file
lines = data.split('\n')
cleanfile.write(lines[0])
for i, line in enumerate(lines[1:]):
# check line for HTML errors
data_error = html_error(i, line, output)
newline = line.replace('"','')
newline=newline.strip()
if not data_error and newline:
cleanfile.write('\n'+newline)
if __name__ == '__main__':
# Clean tabs
filename = sys.argv[1]
ts = datetime.datetime.now().isoformat()
print ts
with open('bing_errors.txt','w') as output:
# print 'Removing quotes within .. product description and ...'
# data = remove_quotes(data)
print 'Removing lines with more than 19 tabs...'
data = read_data(filename)
tabs_check(data, output, 'clean19.txt')
# Delete and reorder columns
print 'Deleting and reordering columns...'
df = pd.read_table('clean19.txt')
tmp = df[['x8','x2','x3','x4','x6','x1','x5']]
tmp.columns = ['MPID',
'Brand (BrandorManufacturer)',
'Title',
'Item Description',
'Price',
'ProductURL',
'ImageURL']
tmp.to_csv('tmp.txt', index=False, sep='\t')
os.remove('clean19.txt')
#HTML errors
print 'Checking for HTML errors...'
data = read_data('tmp.txt')
html_check(data, output, 'BT1.txt')
os.remove('tmp.txt')
# row = tmp[tmp['MPID'] == 8724]
# print row

Want to read multiple csv file one by one and filepaths are stored in a text file using python

here is my code for readinng individual cell of one csv file. but want to read multiple csv file one by one from .txt file where csv file paths are located.
import csv
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
data = list(csv.reader(ifile, delimiter = ';'))
REQ = []
RES = []
n = len(data)
for i in range(n):
x = data[i][1]
y = data[i][2]
REQ.append (x)
RES.append (y)
i += 1
for j in range(2,n):
try:
if REQ[j] != '' and RES[j]!= '': # ignore blank cell
print REQ[j], ' ', RES[j]
except:
pass
j += 1
And csv file paths are stored in a .txt file like
C:\Desktop\Test_Specification\RDBI.csv
C:\Desktop\Test_Specification\ECUreset.csv
C:\Desktop\Test_Specification\RDTC.csv
and so on..
You can read stuff stored in files into variables. And you can use variables with strings in them anywhere you can use a literal string. So...
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip() # or was it trim()? I keep mixing them up
ifile = open(file_name, 'rb')
# ... the rest of your code goes here
Maybe we can fix this up a little...
import csv
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip()
csv_file = csv.reader(open(file_name, 'rb', delimiter=';'))
for record in csv_file[1:]: # skip header row
req = record[1]
res = record[2]
if len(req + res):
print req, ' ', res
you just need to add a while which will read your file containing your list of files & paths upon your first open statement, for example
from __future__ import with_statement
with open("myfile_which_contains_file_path.txt") as f:
for line in f:
ifile = open(line, 'rb')
# here the rest of your code
You need to use a raw string string your path contains \
import csv
file_list = r"C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
with open(file_list) as f:
for line in f:
with open(line.strip(), 'rb') as the_file:
reader = csv.reader(the_file, delimiter=';')
for row in reader:
req,res = row[1:3]
if req and res:
print('{0} {1}'.format(req, res))

Categories

Resources