Pick line in csv file to pull data

Pick line in csv file to pull data - python

I have a code that is working for me in a Cinema 4d project. It is able to read 4 different data points and kick out outputs to the main project. Currently it is reading all of the lines of the csv file and I would like to pick one line and pull the data from that line only.
import c4d
import csv
def main():
path = Spreadsheet #Spreadsheet is an input filename path
with open(path, 'rb') as csv_file:
readed = csv.DictReader(csv_file,delimiter=',')
for i, row in enumerate(readed):
try:
Xcord = float(row["hc_x"])
Ycord = float(row["hc_y"])
Langle = float(row["launch_angle"])
Lspeed = float(row["launch_speed"])
except:
print "Error while reading - {}".format(row)
continue
global Output1
global Output2
global Output3
global Output4
Output1 = Xcord
Output2 = Ycord
Output3 = Langle
Output4 = Lspeed
This is about the first thing I have tried to code. So thanks.

csv.DictReader requires that you open the file with newline="" in order for it to parse the file correctly.
with open(path, 'rb', newline="") as csv_file:
readed = csv.DictReader(csv_file,delimiter=',')
You also don't have any condition to stop reading the file.
row_to_stop = 5
for i, row in enumerate(readed):
if i == row_to_stop:
Xcord = float(row["hc_x"])
Ycord = float(row["hc_y"])
Langle = float(row["launch_angle"])
Lspeed = float(row["launch_speed"])
break
If you only care about one line, don't look up and type cast values until you reach the line you care about.

I would like to pick one line and pull the data from that line only
The code below will return specific line (by index). You will have to split it and grab the data.
def get_interesting_line(file_name: str, idx: int):
cnt = 0
with open(file_name) as f:
while cnt != idx:
f.readline()
cnt += 1
return f.readline().strip()
# usage example below
print(get_interesting_line('data.txt',7))

Related

How to search for a string in part of text?

I am trying to search multiple text files for the text "1-2","2-3","3-H" which occur in the last field of the lines of text that start with "play".
An example of the text file is show below
id,ARI201803290
version,2
info,visteam,COL
info,hometeam,ARI
info,site,PHO01
play,1,0,lemad001,22,CFBBX,HR/78/F
play,1,0,arenn001,20,BBX,S7/L+
play,1,0,stort001,12,SBCFC,K
play,1,0,gonzc001,02,SS>S,K
play,1,1,perad001,32,BTBBCX,S9/G
play,1,1,polla001,02,CSX,S7/L+.1-2
play,1,1,goldp001,32,SBFBBB,W.2-3;1-2
play,1,1,lambj001,00,X,D9/F+.3-H;2-H;1-3
play,1,1,avila001,31,BC*BBX,31/G.3-H;2-3
play,2,0,grayj003,12,CC*BS,K
play,2,1,dysoj001,31,BBCBX,43/G
play,2,1,corbp001,31,CBBBX,43/G
play,4,1,avila001,02,SC1>X,S8/L.1-2
For the text file above, I would like the output to be '4' since there are 4 occurrences of "1-2","2-3" and "3-H" in total.
The code I have got so far is below, however I'm not sure where to start with writing a line of code to do this function.
import os
input_folder = 'files' # path of folder containing the multiple text files
# create a list with file names
data_files = [os.path.join(input_folder, file) for file in
os.listdir(input_folder)]
# open csv file for writing
csv = open('myoutput.csv', 'w')
def write_to_csv(line):
print(line)
csv.write(line)
j=0 # initialise as 0
count_of_plate_appearances=0 # initialise as 0
for file in data_files:
with open(file, 'r') as f: # use context manager to open files
for line in f:
lines = f.readlines()
i=0
while i < len(lines):
temp_array = lines[i].rstrip().split(",")
if temp_array[0] == "id":
j=0
count_of_plate_appearances=0
game_id = temp_array[1]
awayteam = lines[i+2].rstrip().split(",")[2]
hometeam = lines[i+3].rstrip().split(",")[2]
date = lines[i+5].rstrip().split(",")[2]
for j in range(i+46,i+120,1): #only check for plate appearances this when temp_array[0] == "id"
temp_array2 = lines[j].rstrip().split(",") #create new array to check for plate apperances
if temp_array2[0] == "play" and temp_array2[2] == "1": # plate apperance occurs when these are true
count_of_plate_appearances=count_of_plate_appearances+1
#print(count_of_plate_appearances)
output_for_csv2=(game_id,date,hometeam, awayteam,str(count_of_plate_appearances))
print(output_for_csv2)
csv.write(','.join(output_for_csv2) + '\n')
i=i+1
else:
i=i+1
j=0
count_of_plate_appearances=0
#quit()
csv.close()
Any suggestions on how I can do this? Thanks in advance!

You can use regex, I put your text in a file called file.txt.
import re
a = ['1-2', '2-3', '3-H'] # What you want to count
find_this = re.compile('|'.join(a)) # Make search string
count = 0
with open('file.txt', 'r') as f:
for line in f.readlines():
count += len(find_this.findall(line)) # Each findall returns the list of things found
print(count) # 7
or a shorter solution: (Credit to wjandrea for hinting the use of a generator)
import re
a = ['1-2', '2-3', '3-H'] # What you want to count
find_this = re.compile('|'.join(a)) # Make search string
with open('file.txt', 'r') as f:
count = sum(len(find_this.findall(line)) for line in f)
print(count) # 7

read from line to line yelp dataset by python

I want to change this code to specifically read from line 1400001 to 1450000. What is modification?
file is composed of a single object type, one JSON-object per-line.
I want also to save the output to .csv file. what should I do?
revu=[]
with open("review.json", 'r',encoding="utf8") as f:
for line in f:
revu = json.loads(line[1400001:1450000)

If it is JSON per line:
revu=[]
with open("review.json", 'r',encoding="utf8") as f:
# expensive statement, depending on your filesize this might
# let you run out of memory
revu = [json.loads(s) for s in f.readlines()[1400001:1450000]]
if you do it on the /etc/passwd file it is easy to test (no json of course, so that is left out)
revu = []
with open("/etc/passwd", 'r') as f:
# expensive statement
revu = [s for s in f.readlines()[5:10]]
print(revu) # gives entry 5 to 10
Or you iterate over all lines, saving you from memory issues:
revu = []
with open("...", 'r') as f:
for i, line in enumerate(f):
if i >= 1400001 and i <= 1450000:
revu.append(json.loads(line))
# process revu
To CSV ...
import pandas as pd
import json
def mylines(filename, _from, _to):
with open(filename, encoding="utf8") as f:
for i, line in enumerate(f):
if i >= _from and i <= _to:
yield json.loads(line)
df = pd.DataFrame([r for r in mylines("review.json", 1400001, 1450000)])
df.to_csv("/tmp/whatever.csv")

Edit CSV file in python which reads values from another json file in python

I wanted to edit a csv file which reads the value from one of my another json file in python 2.7
my csv is : a.csv
a,b,c,d
,10,12,14
,11,14,15
my json file is a.json
{"a":20}
i want my where the column 'a' will try to match in json file. if their is a match. it should copy that value from json and paste it to my csv file and the final output of my csv file should be looks like this.
a,b,c,d
20,10,12,14
20,11,14,15
Till now I what I have tried is
fileCSV = open('a.csv', 'a')
fileJSON = open('a.json', 'r')
jsonData = fileJSON.json()
for k in range(jsonData):
for i in csvRow:
for j in jsonData.keys():
if i == j:
if self.count == 0:
self.data = jsonData[j]
self.count = 1
else:
self.data = self.data + "," + jsonData[j]
self.count = 0
fileCSV.write(self.data)
fileCSV.write("\n")
k += 1
fileCSV.close()
print("File created successfully")
I will be really thankful if anyone can help me for this.
please ignore any syntactical and indentation error.
Thank You.

Some basic string parsing will get you here.. I wrote a script which works for the simple scenario which you refer to.
check if this solves your problem:
import json
from collections import OrderedDict
def list_to_csv(listdat):
csv = ""
for val in listdat:
csv = csv+","+str(val)
return csv[1:]
lines = []
csvfile = "csvfile.csv"
outcsvfile = "outcsvfile.csv"
jsonfile = "jsonfile.json"
with open(csvfile, encoding='UTF-8') as a_file:
for line in a_file:
lines.append(line.strip())
columns = lines[0].split(",")
data = lines[1:]
whole_data = []
for row in data:
fields = row.split(",")
i = 0
rowData = OrderedDict()
for column in columns:
rowData[columns[i]] = fields[i]
i += 1
whole_data.append(rowData)
with open(jsonfile) as json_file:
jsondata = json.load(json_file)
keys = list(jsondata.keys())
for key in keys:
value = jsondata[key]
for each_row in whole_data:
each_row[key] = value
with open(outcsvfile, mode='w', encoding='UTF-8') as b_file:
b_file.write(list_to_csv(columns)+'\n')
for row_data in whole_data:
row_list = []
for ecolumn in columns:
row_list.append(row_data.get(ecolumn))
b_file.write(list_to_csv(row_list)+'\n')
CSV output is not written to the source file but to a different file.
The output file is also always truncated and written, hence the 'w' mode.

I would recommend using csv.DictReader and csv.DictWriter classes which will read into and out of python dicts. This would make it easier to modify the dict values that you read in from the JSON file.

Large text file to csv, can't open text file

I'm trying to convert this 3,1 GB text file from https://snap.stanford.edu/data/
into a csv file. All the data is structured like:
name: something
age: something
gender: something
which makes it a pretty large text file with some million lines.
I have tried to write a py script to convert it but for some reason it won't read the lines in my for each loop.
Here is the code:
import csv
def trycast(x):
try:
return float(x)
except:
try:
return int(x)
except:
return x
cols = ['product_productId', 'review_userId', 'review_profileName', 'review_helpfulness', 'review_score', 'review_time', 'review_summary', 'review_text']
f = open("movies.txt", "wb")
w = csv.writer(f)
w.writerow(cols)
doc = {}
with open('movies.txt') as infile:
for line in infile:
line = line.strip()
if line=="":
w.writerow([doc.get(col) for col in cols])
doc = {}
else:
idx = line.find(":")
key, value = tuple([line[:idx], line[idx+1:]])
key = key.strip().replace("/", "_").lower()
value = value.strip()
doc[key] = trycast(value)
f.close()
I'm not sure if it is because the document is to large, because a regulare notepad program won't be able to open it.
Thanks up front! :-)

In the line f = open("movies.txt", "wb") you're opening the file for writing, and thereby deleting all its content. Later on, you're trying to read from that same file. It probably works fine if you change the output filename. (I am not going to download 3.1 GB to test it. ;) )

Want to read multiple csv file one by one and filepaths are stored in a text file using python

here is my code for readinng individual cell of one csv file. but want to read multiple csv file one by one from .txt file where csv file paths are located.
import csv
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
data = list(csv.reader(ifile, delimiter = ';'))
REQ = []
RES = []
n = len(data)
for i in range(n):
x = data[i][1]
y = data[i][2]
REQ.append (x)
RES.append (y)
i += 1
for j in range(2,n):
try:
if REQ[j] != '' and RES[j]!= '': # ignore blank cell
print REQ[j], ' ', RES[j]
except:
pass
j += 1
And csv file paths are stored in a .txt file like
C:\Desktop\Test_Specification\RDBI.csv
C:\Desktop\Test_Specification\ECUreset.csv
C:\Desktop\Test_Specification\RDTC.csv
and so on..

You can read stuff stored in files into variables. And you can use variables with strings in them anywhere you can use a literal string. So...
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip() # or was it trim()? I keep mixing them up
ifile = open(file_name, 'rb')
# ... the rest of your code goes here
Maybe we can fix this up a little...
import csv
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip()
csv_file = csv.reader(open(file_name, 'rb', delimiter=';'))
for record in csv_file[1:]: # skip header row
req = record[1]
res = record[2]
if len(req + res):
print req, ' ', res

you just need to add a while which will read your file containing your list of files & paths upon your first open statement, for example
from __future__ import with_statement
with open("myfile_which_contains_file_path.txt") as f:
for line in f:
ifile = open(line, 'rb')
# here the rest of your code

You need to use a raw string string your path contains \
import csv
file_list = r"C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
with open(file_list) as f:
for line in f:
with open(line.strip(), 'rb') as the_file:
reader = csv.reader(the_file, delimiter=';')
for row in reader:
req,res = row[1:3]
if req and res:
print('{0} {1}'.format(req, res))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Pick line in csv file to pull data - python

Related

How to search for a string in part of text?

read from line to line yelp dataset by python

Edit CSV file in python which reads values from another json file in python

Large text file to csv, can't open text file

Want to read multiple csv file one by one and filepaths are stored in a text file using python

Categories

Resources