so i am trying to write a map/reduce code to analyze the total entries by unit of the NYC subway.
my mapper code generated a .txt file as demanded by the project.
import sys
def mapper():
for line in sys.stdin:
data = line.strip().split(",")
if len(data) == 22:
Unnamed,UNIT,DATEn,TIMEn,Hour,DESCn,ENTRIESn_hourly,EXITSn_hourly,maxpressurei,maxdewpti,mindewpti,minpressurei,meandewpti,meanpressurei,fog,rain,meanwindspdi,mintempi,meantempi,maxtempi,precipi,thunder = data
print "{0}\t{1}".format(UNIT,ENTRIESn_hourly)
sys.stdin = open('turnstile_data_master_with_weather.csv')
sys.stdout = open('mapper_result.txt', 'w')
mapper()
the file mapper_result.txt is correct, it's a 2 column file(key/value) of the entries by unit of NYC subway.
so ,then, i went to write de reducer code to sum all the values by unit, as follows:
import sys
def reducer():
entriesTotal = 0
oldKey = None
for line in sys.stdin:
data = line.strip().split("\t")
if len(data) != 2:
continue
thisKey,thisEntry = data
if oldKey and oldKey != thisKey:
print "{0}\t{1}".format(oldKey,entriesTotal)
entriesTotal = 0
oldKey = thisKey
entriesTotal += float(thisEntry)
if oldKey != None:
print "{0}\t{1}".format(oldKey, entriesTotal)
sys.stdin = open('mapper_result.txt')
sys.stdout = open('reducer_result.txt', 'w')
reducer()
ValueError Traceback (most recent call last)
<ipython-input-28-8ec50e7ee920> in <module>()
21 sys.stdin = open('mapper_result.txt')
22 sys.stdout = open('reducer_result.txt', 'w')
---> 23 reducer()
<ipython-input-28-8ec50e7ee920> in reducer()
15 entriesTotal = 0
16 oldKey = thisKey
---> 17 entriesTotal += float(thisEntry)
18 if oldKey != None:
19 print "{0}\t{1}".format(oldKey, entriesTotal)
ValueError: could not convert string to float: ENTRIESn_hourly
maybe it s a problem converting the strings in .txt file in floats.
anyone have an idea?
Ok , so i managed to finish it by using the try...except in the thisEntry.
here is the final code:
import sys
def reducer():
entriesTotal = 0
oldKey = None
for line in sys.stdin:
data = line.strip().split("\t")
if len(data) != 2:
continue
thisKey,thisEntry = data
if oldKey and oldKey != thisKey:
print "{0}\t{1}".format(oldKey,entriesTotal)
entriesTotal = 0
oldKey = thisKey
try:
entriesTotal += float(thisEntry)
except:
print thisEntry
if oldKey != None:
print "{0}\t{1}".format(oldKey, entriesTotal)
sys.stdin = open('mapper_result.txt', 'r')
sys.stdout = open('reducer_result.txt', 'w')
reducer()
Related
Anybody can advise what could be wrong with my code?
I am trying to make a method that removes the single line comments from the content.
Also, the method should return the single line comments that start with '#'.
import os
def deleteComments(file):
try:
my_file = open(file, 'r')
data = my_file.read()
clean = ""
comment= 0
if i[0] == "#":
comment += 1
else:
pass
with open("clean-", "w") as f:
f.write(clean)
f.close()
my_file.close()
except:
print("An error occurred with accessing the files")
return file
def deleteComment(file):
try:
my_file = open(file, 'r')
data = my_file.read()
clean = ""
comment= 0
if i[0] == "#":
comment += 1
else:
pass
with open("clean-", "w") as f:
f.write(clean)
f.close()
my_file.close()
except:
print("An error occurred with accessing the files")
return file
This should make it work.
import os
def deleteComments(file):
try:
my_file = open(file, 'r')
data = my_file.read()
clean = ""
comments_count = 0
for i in data.split('\n'):
if i[0] == "#":
clean += i
clean += '\n'
comments_count += 1
else:
pass
name = os.path.basename(path)
with open("clean-" + name, "w") as f:
f.write(clean)
f.close()
my_file.close()
return comments_count
except:
print("An error occurred with accessing the files")
return file
Why the result is not saved RIPEMD160.txt gives an error
I can see on the processor that the code is working but the file is empty
I always get the same
IndentationError: unindent does not match any outer indentation level
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import base58
def count_lines(file):
return sum(1 for line in open(file, 'r'))
def convert(file_in,file_out,nom):
print("===========File input -> " + file_in)
print("===========File output -> " + file_out)
i = 0
line_10 = 100000
ii = 0
f = open(file_in,'r')
fw = open(file_out,'a')
while i <= nom:
if (i+ii) == nom:
print("\n Finish")
break
if line_10 == i:
print("Error - {} | Total line -> {}".format(ii,line_10),end='\r')
line_10 += 100000
try:
adr58 = f.readline().strip()
adr160 = base58.b58decode_check(adr58).hex()[2:]
except:
ii +=1
else:
fw.write(adr160+'\n')
i += 1
f.close()
fw.close()
if __name__ == "__main__":
if len (sys.argv) < 3:
print ("error")
sys.exit (1)
if len (sys.argv) > 3:
print ("error")
sys.exit (1)
file_in = sys.argv[1]
file_out = sys.argv[2]
line_count = count_lines(file_in)
print("all lines -> " + str(line_count))
convert(file_in,file_out,line_count)
print('Finish')
because you are not writing anything to the file and your code is not formatted correctly.
import base58
def base58_to_dec(addr):
dec = 0
for i in range(len(addr)):
dec = int(dec * 58 + b58.index(addr[i]))
return(dec)
def dec_to_byte(dec):
out = ''
while dec != 0:
remn = mpf(dec % 256)
dec = mpf((dec - remn) / 256)
temp = hex(int(remn))
if len(temp) == 3:
temp = '0' + temp[-1]
else:
temp = temp[2:]
out = temp + out
return (out)
def decode(addr):
b58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
dec = base58_to_dec(addr)
out = dec_to_byte(dec)
return (out)
with open('addresses.txt', 'r') as f, \
open('RIPEMD160.txt', 'a') as i:
for addr in f:
addr = base58.b58decode_check(addr).encode('hex')[2:]
ads = decode(addr)
i.write(ads)
i.close()
The process of generating bitcoin address is like this
public_key=>sha256(sha256(public_key))=>RIPEMD160_address=>base58_address
so there is no need for other procedures but simply reverse base58 to rmd160 like below
import base58
i =open('RIPEMD160.txt', 'a') #open file with append mode on
with open('addresses.txt', 'r') as f:#open files with addresses
for addr in f:
addr = addr.strip()#remove trailing space and newline character
rmd160 = base58.b58decode_check(str(addr)).encode('hex')[2:]#notice str forcing addr to be a string
i.write(rmd160+"\n")
i.close()
f.close()
Sorry if this is a repeat question - I'm not that good at Python, and the other answers haven't really helped me.
I have a python script: plink2treemix.py which is called from the command line like so:
plink2treemix.py data.vcf.frq.strat.gz treemix.gz
This is the script (it's long but there are two areas that are causing the issue, which I've hashtagged):
#!/usr/bin/python
import sys, os, gzip
if len(sys.argv) < 3:
print("plink2treemix.py [gzipped input file] [gzipped output file]")
print("ERROR: improper command line")
exit(1)
infile = gzip.open(sys.argv[1], "rb")
outfile = gzip.open(sys.argv[2], "w")
pop2rs = dict()
rss = list()
rss2 = set()
line = infile.readline()
line = infile.readline()
while line:
line = line.strip().split()
rs = line[1]
pop = line[2]
mc = line[6]
total = line[7]
if rs not in rss2:
rss.append(rs)
rss2.add(rs)
if pop not in pop2rs:
pop2rs[pop] = dict() #FIRST TYPE ERROR
if pop2rs[pop] in (rs)==0:
pop2rs[pop][rs] = " ".join([mc, total])
line = infile.readline()
print("reached end of while loop")
pops = pop2rs.keys()
for pop in pops:
print >> outfile, pop,
print(outfile, "")
print("printed outfile")
for rs in rss:
for pop in pops:
tmp = pop2rs[pop]['rs'].split #SECOND TYPE ERROR
c1 = int(tmp[0])
c2 = int(tmp[1])
c3 = c2-c1
print >> outfile, ",".join([str(c1), str(c3)]),
print(outfile, "")
print("programme finished")
The first TypeError is:
line 31, in <module>
if pop2rs[pop] in (rs)==0:
TypeError: 'in <string>' requires string as left operand, not dict
I solved this by changing the dict() in the line above to str().
This results in a different TypeError later in the code:
tmp = pop2rs[pop][rs]
TypeError: string indices must be integers, not str
Which I'm not sure how to fix, as rs = line[1] of the input file and is already an int value, the header of the file is below:
CHR SNP CLST A1 A2 MAF MAC NCHROBS
22 22:17049382 0 T C 0 0 2
22 22:17049382 1 T C 0 0 2
22 22:17049382 2 T C 0 0 2
I'd be grateful for any help!
I have a pickle db with 5 variable in it that goes : rafTur, rafKat, rafNo, rafIndex, rafIndexData. I'm trying to delete a data inside my pickle file. My main goal is taking an input from user that goes like this :
rafTur = S rafKat = 1 rafNo = 2 rafIndex = 3
And then finding that imput from my pickle file that named noSqlDB. and then delete the entire data about input.
def delPic():
infile = open('noSqlDB', 'rb+')
sistem = pickle.load(infile)
flag = False
rafTur = str(input('Rafın türünü giriniz : '))
rafKat = int(input('Rafın katını giriniz : '))
rafNo = int(input('Rafın Nosunu giriniz : '))
rafIndex = int(input('Rafın indexini giriniz : '))
# read to the end of file.
for x in range((len(sistem) + 1)):
try:
if (sistem['rafTur'].upper() == rafTur.upper() and sistem['rafKat'] == rafKat and sistem['rafNo'] == rafNo and sistem['rafIndex'] == rafIndex):
del sistem
flag = True
sistem = pickle.load(infile)
except EOFError:
break
if flag == False:
print('Record not Found')
infile.close()
When i run this code nothing changes. The data stays on noSqlDB. How can i delete the data inside the noSqlDB ? like this
I tried changing
del sistem
to
del sistem['rafTur'],sistem['rafKat'], sistem['rafNo'],sistem['rafIndex'],sistem['rafIndexData']
def unpickle_database(filename):
with open(filename, 'rb') as f:
while True:
try:
yield pickle.load(f)
except EOFError:
break
def save_object(obj, filename,a):
if a < 1:
with open(filename, 'wb+') as output:
pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)
else :
with open(filename, 'ab+') as output:
pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)
def delPic():
temp =0
students = list(unpickle_database('noSqlDB'))
for student in students:
print(student)
save_object(student, 'noSqlDB',temp)
temp += 1
Found a solution after trying some code here it is.
I am using a script in fusion360 called importsplinecsv
I was wondering if it was possible to modify the script so that it would import one row every 10th row?
as the amount of rows that are being imported are very large and bloating.
if I could get some help that would be awesome.
here is the text
Author-Autodesk Inc.
Description-Import spline from csv file
import adsk.core, adsk.fusion, traceback
import io
def run(context):
ui = None
try:
app = adsk.core.Application.get()
ui = app.userInterface
# Get all components in the active design.
product = app.activeProduct
design = adsk.fusion.Design.cast(product)
title = 'Import Spline csv'
if not design:
ui.messageBox('No active Fusion design', title)
return
dlg = ui.createFileDialog()
dlg.title = 'Open CSV File'
dlg.filter = 'Comma Separated Values (*.csv);;All Files (*.*)'
if dlg.showOpen() != adsk.core.DialogResults.DialogOK :
return
filename = dlg.filename
with io.open(filename, 'r', encoding='utf-8-sig') as f:
points = adsk.core.ObjectCollection.create()
line = f.readline()
data = []
while line:
pntStrArr = line.split(',')
for pntStr in pntStrArr:
try:
data.append(float(pntStr))
except:
break
if len(data) >= 3 :
point = adsk.core.Point3D.create(data[0], data[1], data[2])
points.add(point)
line = f.readline()
data.clear()
if points.count:
root = design.rootComponent
sketch = root.sketches.add(root.xYConstructionPlane)
sketch.sketchCurves.sketchFittedSplines.add(points)
else:
ui.messageBox('No valid points', title)
except:
if ui:
ui.messageBox('Failed:\n{}'.format(traceback.format_exc()))
I have not used this library before but try:
for i, line in enumerate(f):
if i%10==0:
then your import command here
f is your filepointer
i will be the linenumber and line will be your line
dlg = ui.createFileDialog()
dlg.title = 'Open CSV File'
dlg.filter = 'Comma Separated Values (*.csv);;All Files (*.*)'
if dlg.showOpen() != adsk.core.DialogResults.DialogOK :
return
filename = dlg.filename
with io.open(filename, 'r', encoding='utf-8-sig') as f:
points = adsk.core.ObjectCollection.create()
for i, line in enumerate(f):
if i%10==0:
while line:
pntStrArr = line.split(',')
for pntStr in pntStrArr:
try:
data.append(float(pntStr))
except:
break
if len(data) >= 3 :
point = adsk.core.Point3D.create(data[0], data[1], data[2])
points.add(point)
line = f.readline()
data.clear()
if points.count:
root = design.rootComponent
sketch = root.sketches.add(root.xYConstructionPlane)
sketch.sketchCurves.sketchFittedSplines.add(points)
else:
ui.messageBox('No valid points', title)
except:
if ui:
ui.messageBox('Failed:\n{}'.format(traceback.format_exc()))