Miss calculation of change value from scrape data

Miss calculation of change value from scrape data - python

Im doing a scrape project from api url, scraping value(datetime,open,close,high,low) and write to mysql database
code
startTime_source = "https://api.binance.com/api/v1/klinesstartTime=0&symbol=btcusdt"&interval=1h&limit=1000"
startTime_source_get = requests.get(startTime_source)
startTime_source_json = json.loads(startTime_source_get.text)
#timestamp now
import time;
tsnw = time.time()*1000.0
tsnw = (int(tsnw))
for y in range(startTime_source_json[0][0],tsnw,3600000000): #scrape data from start to end
data = "https://api.binance.com/api/v1/klines?startTime=" + str(y) + "&symbol="+coin[x]+"&interval=1h&limit=1000"
data_get = requests.get(data)
#convert class response to json list
data_list = json.loads(data_get.text)
#loop for convert timestamp to datetime
for z in range(0,len(data_list)):
#delete all feature except dt_ which is after id=5
del data_list[z][5:]
#assign dt_ as timestamp which is id=1
timestamp = data_list[z][0]
#convert msec to sec
timestamp = time.localtime(timestamp / 1000.0)
#convert timestamp to datetime
dt = time.strftime("%Y-%m-%d %H:%M:%S", timestamp)
#replace value timestamp as dt
data_list[z][0] = dt
#seperate each feature
dt_ = [data_list[z][0]] #datetime
open_ = [data_list[z][1]] #open price
close_ = [data_list[z][4]] #close price
high_ = [data_list[z][2]] #high price
low_ = [data_list[z][3]] #low price
#cal the value of change and write to mysql database
num_1 = float(data_list[z-1][4])
num_2 = float(data_list[z][4])
#calculator formula
cal_change = ((num_2 - num_1) / num_1)*100
#round for 2 digit decimal ex 0.00
change_ = [round(cal_change,2)]
data_to_insert = [dt_+open_+close_+high_+low_+change_]
try:
#insert value
write_data = "INSERT INTO" + " " + coin[x] + "(dt_,open_,close_,high_,low_,change_) VALUES (%s,%s,%s,%s,%s,%s)"
mycursor.executemany(write_data,data_to_insert)
#it show error for multiple times
except mysql.connector.Error as err :
if err.sqlstate == "23000":
print('error')
pass
The problem is multiple rows has wrong calculation, example below dttime 2020-03-10 18:00:00 and the change value should be 0.33 not -18.21, why is that? help please, thank you.

Related

ValueError: time data does not match

So I got this error raised
ValueError: time data '8/16/2016 9:55' does not match format '%m/&d/%Y
%H:%M'.
I know that %m is the format for month with two digits (zero-padded). And as we can see that '8' (August) does not have zero padded. Is that the problem for this error? And how I fix this?
import datetime as dt
result_list = []
for a in ask_posts:
result_list.append([a[6], int(a[4])])
counts_by_hour = {}
comments_by_hour = {}
date_format = '%m/&d/%Y %H:%M'
for row in result_list:
date = row[0]
comment = row[1]
time = dt.datetime.strptime(date, date_format).strftime("%H")
``` I want to extract the Hour only```
if time not in counts_by_hour:
counts_by_hour[time] = 1
comments_by_hour[time] = comment
else:
counts_by_hour[time] += 1
comments_by_hours[time] += comment

you have an error in your dateformat % not &
import datetime as dt
result_list = []
for a in ask_posts:
result_list.append([a[6], int(a[4])])
counts_by_hour = {}
comments_by_hour = {}
date_format = '%m/%d/%Y %H:%M' # change & with %
for row in result_list:
date = row[0]
comment = row[1]
time = dt.datetime.strptime(date, date_format).strftime("%H")
``` I want to extract the Hour only```
if time not in counts_by_hour:
counts_by_hour[time] = 1
comments_by_hour[time] = comment
else:
counts_by_hour[time] += 1
comments_by_hours[time] += comment

process data on user input

I have a scenario where I am uploading a text file then providing a user input and then according to the user input processing further.
Sample file :
DOWN 07.09.2016 08:21:33 - 07.09.2016 08:23:33
UP 07.11.2016 09:41:07 - 09.11.2016 09:20:33
DOWN 09.11.2016 08:26:33 - 09.11.2016 08:46:33
UP 09.11.2016 08:23:33 - 09.11.2016 08:25:33
DOWN 09.11.2016 08:36:33 - 09.11.2016 08:41:33
DOWN 10.11.2016 08:36:33 - 10.11.2016 08:39:33
code :
try:
import Tkinter as Tk
import tkFileDialog as fileDialog
except ImportError:
import tkinter as Tk
import tkinter.filedialog as fileDialog
import datetime
def read_data():
'''
Read data from file and convert to list with datetime
which can be used to calculate time and display.
'''
global data
filename = fileDialog.askopenfilename()
if filename:
# read all lines
with open(filename) as fileHandle:
lines = fileHandle.readlines()
# convert to `datetime` (not `timestamp`)
data = []
for line in lines:
#direction = line[:4].strip()
#dt1 = line[5:24]
#dt2 = line[27:46]
direction, d1, t1, _, d2, t2 = line.split()
dt1 = d1 + ' ' + t1
dt2 = d2 + ' ' + t2
t1 = datetime.datetime.strptime(dt1, "%d.%m.%Y %H:%M:%S")
t2 = datetime.datetime.strptime(dt2, "%d.%m.%Y %H:%M:%S")
seconds = (t2-t1).seconds
data.append([direction, t1, t2, seconds])
print(data)
def processText(lines, selected_date):
total = 0
start = None
print(selected_date)
# if there is `selected_date` then convert to `datetime`
if selected_date:
try:
selected_date = datetime.datetime.strptime(selected_date, "%d.%m.%Y")
except AttributeError as ex:
print("ERROR:", ex)
selected_date = None
# calculate time
for direction, t1, t2, seconds in lines:
if direction == "DOWN":
# if `selected_date` then filter times
if selected_date and t1 <= selected_date:
continue
if not start:
start = t1.strftime("%d.%m.%Y %H:%M:%S")
total += seconds
# convert to minutes after summing all second
total = total//60
return total, start
def calculate():
all_dates = entry.get().split(',')
print(all_dates)
all_dates = [date.strip() for date in all_dates]
txt = ''
for current_date in all_dates:
down, start = processText(data, current_date)
txt += "Total Downtime is {0} min from {1}\n".format(down, start)
textVar.set(txt)
# --- main ---
data = None # to keep data from file
# -
root = Tk.Tk()
button = Tk.Button(root, text="Open", command=read_data)
button.grid(column=1, row=1)
textVar = Tk.StringVar(root)
label = Tk.Label(root, textvariable=textVar)
label.grid(column=1, row=2)
entry = Tk.Entry(root)
entry.grid(column=1, row=3)
button2 = Tk.Button(root, text="Calculate", command=calculate)
button2.grid(column=1, row=4)
root.mainloop()
Above code prompts me to select date in format Date1.Month1.Year1,Date2.Month2.Year2 ... (depending on the number input for date.)
and returns output as :
Total Downtime is x min from date1.month1.year1 xx:xx:xx(time1)
Total Downtime is y min from date2.month2.year2 yy:yy:yy(time2)
Here I have the details of downtime in mins and i want to convert that in percentage till date. For example ->
user input :
1.9.2016,1.11.2016,1.1.2016
Output :
Total Downtime is 30 min from 1.9.2016 08:21:33 & Availability percentage from selected date to till date : xx.xxx%
Total Downtime is 28 min from 1.11.2016 08:26:33 & Availability percentage from selected date to till date : yy.yyy%
Total Downtime is 30 min from 1.11.2016 08:26:33 & Availability percentage from selected date to till date : zz.zzz%
logic behind availability calculation would be
total number of min down from date(which is retrieved)/total number of min till date * 100
I am stuck in this part, Is this achievable? Any help would be great!!

If you run processText() with None instead of date then you get total number of minutes when it was down
total_down, start = processText(data, None)
and you can use it to calculate percentage.
percentage = (down/total_down) * 100
and you can use string formatting {:.2f} to display only two digits after dot
def calculate():
all_dates = entry.get().split(',')
print(all_dates)
all_dates = [date.strip() for date in all_dates]
# calculate total number of minutes when it was down
total_down, start = processText(data, None) # <-- None
print('total_down:', total_down)
txt = ''
for current_date in all_dates:
down, start = processText(data, current_date)
# calculate percetage
percentage = (down/total_down) * 100
# use string formatting {:.2f} to display only two digits after dot
txt += "Total Downtime is {} min from {} ({:.2f}%)\n".format(down, start, percentage)
textVar.set(txt)
If you want total number of minutes when it was down or up then you have to change processText and add new parameter (ie. word) which will check if direction is DOWN or UP or both (word = None)
def processText(lines, selected_date, word="DOWN"):
total = 0
start = None
print(selected_date)
# if there is `selected_date` then convert to `datetime`
if selected_date:
try:
selected_date = datetime.datetime.strptime(selected_date, "%d.%m.%Y")
except AttributeError as ex:
print("ERROR:", ex)
selected_date = None
# calculate time
for direction, t1, t2, seconds in lines:
if not word or word == direction:
# if `selected_date` then filter times
if selected_date and t1 <= selected_date:
continue
if not start:
start = t1.strftime("%d.%m.%Y %H:%M:%S")
total += seconds
# convert to minutes after summing all second
total = total//60
return total, start
def calculate():
all_dates = entry.get().split(',')
print(all_dates)
all_dates = [date.strip() for date in all_dates]
# calculate total number of minutes when it was down and up
total_down, start = processText(data, None, None)
print('total_down:', total_down)
txt = ''
for current_date in all_dates:
down, start = processText(data, current_date, "DOWN")
percentage = (down/total_down) * 100
txt += "Total Downtime is {} min from {} ({:.2f}%)\n".format(down, start, percentage)
textVar.set(txt)

Openpyxl Copy Time, returns -1

I am trying to great an excel file that has is the combination of multiple excel files. However, when I copy a cell with a value 00:00, and append it to the master excel file, excel thinks the time is for the year 1899?
Here is my code:
def excel_graphs_all(day, users):
chart_wb = Workbook(write_only=True)
graph_ws = chart_wb.create_sheet(day + ' Graphs', 0)
chart_wb_filename = 'graphs_' + day + '.xlsx'
columnNum = ['A', 'H']
rowNum = 1
i = 0
for user in users:
filename = user[1] + '_' + day + '.xlsx'
iter_wb = load_workbook(filename=filename,read_only=True)
ws = iter_wb.active
chart_ws = chart_wb.create_sheet(user[1])
for row in ws.rows:
chart_ws.append([row[0].value, row[1].value])
chart = ScatterChart()
chart.title = user[1] + ' ' + day + ' Heartrate Data'
chart.x_axis.title = 'Time'
chart.y_axis.title = 'Heartrate'
chart.x_axis.scaling.min = 0
chart.x_axis.scaling.max = 1
xvalues = Reference(chart_ws, min_col=1, min_row=1, max_row= ws.max_row)
yvalues = Reference(chart_ws, min_col=2, min_row=1, max_row= ws.max_row)
series = Series(yvalues, xvalues, title='Heartrate')
chart.series.append(series)
spot = columnNum[i % 2]+str(rowNum)
graph_ws.add_chart(chart, spot)
if ((i+1)%2)== 0:
rowNum += 16
i += 1
chart_wb.save(chart_wb_filename)
return chart_wb_filename
Thanks!

What do you mean by value 00:00? Excel uses formatting and not typing for dates and times. From the specification:
When using the 1900 date system, which has a base date of 30th
December 1899, a serial date- time of 1.5 represents midday on the
31st December 1899
It sounds like you just need to check the formatting for the relevant cells.

Using datetime package to convert a date with time to only date.

My function is to read data from a file that consists of dates with times a tweet was written, and sentiments (good, bad or neutral) it's classified as; select date with times, and sentiments between a start and end date; and finally create three dictionaries (positive, negative and neutral) that use the date as key, and number of positive, negative or neutral tweets made in a day.
The problems I have are:
a) How do I get only date to display, and not date and time?.
b) How do I get my program to include both start and end date?
c) How do I separate a key and value with a semi-colon in a dictionary?
def get_sentiment_dates(start_date, end_date):
positive_dict = {}
negative_dict = {}
neutral_dict = {}
f = open("BAC2_answer.csv", "r")
tweets = f.readlines()
bin_use =[]
bin_trash =[]
bin_use_senti = []
bin_trash_senti = []
start_date_obj = datetime.strptime(start_date, '%Y-%m-%d')
end_date_obj = datetime.strptime(end_date, '%Y-%m-%d')
for i in tweets:
specs = i.split(',')
t_and_d = specs[0]
dt_obj = datetime.strptime(t_and_d, "%Y-%m-%d %H:%M:%S")
chars_body = specs[1].strip()
if ((dt_obj >= start_date_obj) and dt_obj <= (end_date_obj)):
bin_use.append(dt_obj)
bin_use_senti.append(chars_body)
else:
bin_trash.append(dt_obj)
bin_trash_senti.append(chars_body)
num_of_pos = 0
num_of_neg = 0
num_of_neut = 0
for i,j in zip(bin_use, bin_use_senti):
if j == 'Bullish':
num_of_pos +=1
positive_dict = (i, num_of_pos)
elif j == 'Bearish':
num_of_neg+=1
negative_dict = (i, num_of_neg)
else:
num_of_neut+=1
neutral_dict = (i, num_of_neut)
# print str(positive_dict) + "," + str(negative_dict) + "," + str(neutral_dict)
f.close()
return [positive_dict,negative_dict,neutral_dict]

Fatal Error (INFADI) Missing Directory - Multiprocessing python arcgis script error

I have written a script that uses pool.map to process multiple netCDF files and store information in a table. Each process runs a function to process one year. Each year has it's own individual file geodatabase, table within that geodatabase, and mxd. I also set the default workspace and scratch workspace to that geodatabase. For example when the function loads the year 1979 it accesses the 1979 geodatabase, 1979 table within that geodatabase, and 1979 mxd. 1980 would access the 1980 geodatabase, 1970 table within that geodatabase, and 1980 mxd.
If I run 1 process everything works fine. If I try to run 2 or more I get Fatal Error (INFADI) Missing Directory. Right now I'm running 6 processes. 4 Crash and the other 2 keep going without a problem.
Here is the code:
# Multiprocessing netCDF data into a table
######################################
import arcpy, calendar, datetime, numpy, multiprocessing, sys, re, timeit, os
from arcpy.sa import *
#Receive day and year and return the date str in MM/DD/YYYY
def getDate(day, year):
date = datetime.datetime(year, 1, 1) + datetime.timedelta(day)
date = date.timetuple()
date = str(date[1]) + '/' + str(date[2]) + '/' + str(date[0])
return date
#Main loop
#Receive a year int and process all dates within "good" months
def doCalc(year):
yearstr = str(year)
print('Starting doCalc: ' + yearstr)
############################################
#### CHANGE THIS INPUT ####
Species = 'Mallard'
Regiondb = 'North_America' #Spaces not allowed in filename to map
Region = 'Duck Zone' #Spaces allowed in DB
regionField = 'ZONE_NAME'
############################################
defaultGDB = "D:\\GIS\projects\\LCC_WSI_Climate\\year" + yearstr + ".gdb"
#Setting environmental variables
arcpy.env.workspace = defaultGDB
arcpy.env.scratchWorkspace = defaultGDB
arcpy.env.overwriteOutput = True
#desired months
goodmonth = (1, 2, 3, 9, 10, 11, 12)
#Acquire necessary extension and exit if it can't acquire
#Spatial Extension
try:
if arcpy.CheckExtension("Spatial") == "Available":
arcpy.CheckOutExtension("Spatial")
print("Acquired Spatial license")
else:
sys.exit("No Spatial Analyst license available")
except:
sys.exit("No Spatial Analyst license available")
#Geostats Extension
try:
if arcpy.CheckExtension("GeoStats") == "Available":
arcpy.CheckOutExtension("GeoStats")
print("Acquired GeoStats license")
else:
sys.exit("No GeoStats license available")
except:
sys.exit("No GeoStats license available")
#Try and except statements currently used for debugging and that is why the exceps are not specific.
try:
#Select map document and set up layers. Using a map document because NetCDFRasters aren't
#playing nice if not "living" in a document
print('Starting :' + yearstr)
start = timeit.default_timer()
mxd = arcpy.mapping.MapDocument("D:/GIS/projects/LCC_WSI_Climate/python code/WSI_maps"+yearstr+".mxd")
df = arcpy.mapping.ListDataFrames(mxd)[0]
#Set the table to write to according to the year received
for table in arcpy.mapping.ListTableViews(mxd):
if table.name == 'T'+yearstr:
WSITable = table
#Set the Clip layer according to the Region specified above
for dflayer in arcpy.mapping.ListLayers(mxd,"", df):
if dflayer.name == Region:
WSIClip = dflayer
if dflayer.name == 'wsi_Layer':
WSILayer = dflayer
#Set directory where netCDF files reside
direct = "D:/GIS/projects/LCC_WSI_Climate/python code/wsi/"
#Set netCDF file according to year received
inputLayer = direct +'wsi.' + yearstr + '.nc'
#If it's 1979 it starts in September.
if year == 1979:
startday = 243
else:
startday = 0
#Make sure the wsi_Layer is the correct file.
arcpy.MakeNetCDFRasterLayer_md(inputLayer, "wsi", "x", "y", "wsi_Layer")
#Checks if the current year is a leap year
if calendar.isleap(year):
maxday = 366
else:
maxday = 365
#Cycle through every day within the year
for daycnt in range(startday, maxday):
day = 0
sendday = daycnt+1
date = getDate(daycnt, year)
newdate = datetime.datetime(year, 1, 1) + datetime.timedelta(daycnt)
newdate = newdate.timetuple()
month = newdate[1]
day = newdate[2]
#If the month is not desired it will skip the day and continue with the next day
if month not in goodmonth:
continue
datestr = str(month) + '/' + str(day) + '/' + str(year)
print(datestr)
#Use the Select by Dimension tool to change the netCDF layer to the current date
WSILayerRas = Raster("wsi_Layer")
arcpy.SelectByDimension_md(WSILayerRas, [["time", date]],"BY_VALUE")
#Save the file in defaultGDB. Processing didn't work without saving.
WSILayerRas.save("Temp"+yearstr)
##########################################
## Regions
##
wsikm = 0
datalist = []
#Calculate time
time = 'time ' + str(date)
#Setup the cursor to write to the output Table defined above (taken from mxd).
cursorout = arcpy.da.InsertCursor(WSITable, ("CATEGORY", "STATE", "SUBCATEGORY", "DATE","SQKM", "SPECIES"))
#Setup search cursor to go through the input dataset and clip raster to the shape of each feature.
#Copy data to the output table
with arcpy.da.SearchCursor(WSIClip,(regionField, "SHAPE#", "STATE_NAME")) as cursorin:
for row in cursorin:
AOIname = row[0]
AOIshape = row[1]
AOIextent = AOIshape.extent
AOIstate = row[2]
#dealing with odd characters and spaces
AOIname = re.sub("\s+", "", AOIname)
AOIname = AOIname.strip()
AOIname = AOIname.replace("'", "")
AOIname = AOIname.replace("/", "_")
AOIstatea = re.sub("\s+", "", AOIstate)
#print('State: ' + AOIstate + ', AOI: ' + AOIname)
savetemp = AOIstatea + '_' + AOIname + '_' + yearstr
#Process crashes running this try/except. The except doesn't catch it.
try:
deleteme = Raster(arcpy.gp.ExtractByMask_sa(WSILayerRas, AOIshape))
except:
continue
deleteme.save(savetemp)
#Add raster to an array for deletion later
datalist.append(deleteme)
#Convert the Extracted raster to a NumPy array and extract desired values
#by incrementing a counter and calculating area.
my_array = arcpy.RasterToNumPyArray(deleteme)
rows, cols = my_array.shape
countAOI = 0
wsikm = 0
#time calculation
for rowNum in xrange(rows):
for colNum in xrange(cols):
value = my_array.item(rowNum, colNum)
if value >= 7.2:
countAOI +=1
wsikm = countAOI * 1024
#write to the output Table
cursorout.insertRow((Region,AOIstate, AOIname, datestr, wsikm, Species))
#Cleanup the geodatabase
## print('Cleaning up')
arcpy.Delete_management(savetemp)
datasetList = arcpy.ListDatasets("Extract_W*", "Raster")
try:
for dataset in datasetList:
arcpy.Delete_management(dataset)
except:
continue
#attempts at fixing the error
deleteme = None
del cursorout
del cursorin
#Finish calculating time processing 1 entire year
stop = timeit.default_timer()
print stop - start
except Exception as e:
#print sys.exc_traceback.tb_lineno
return e
####
# MAIN
####
if __name__ == '__main__':
print('Starting script')
#Start timing entire process
start = timeit.default_timer()
#Year Range
#Entire dataset
#yearlist = list(range(1979, 2013))
#Sample
yearlist = list(range(1979, 1986))
#Create pool
print("Creating pool")
pool = multiprocessing.Pool(7)
#Call doCalc and pass the year list
pool.map(doCalc, yearlist)
## e = doCalc(1979)
print("Closing pool")
pool.close()
print("Joining pool")
pool.join()
#print(e[0])
stop = timeit.default_timer()
print stop - start
print("Complete")

The fix was found and posted http://forums.arcgis.com/threads/109606-Multiprocessing-script-errors-on-geoprocessing-line-of-code-INFADI-(Missing-Dir)?p=387987&posted=1#post387987
The truck is to set your os.environ("TEMP") as well as TMP uniquely within the process.
def doCalc(year):
yearstr = str(year)
import time
time.sleep(1.1)
newTempDir = r"C:\temp\gptmpenvr_" + time.strftime('%Y%m%d%H%M%S') + yearstr
os.mkdir(newTempDir)
os.environ["TEMP"] = newTempDir
os.environ["TMP"] = newTempDir
print('Starting doCalc: ' + yearstr)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Miss calculation of change value from scrape data - python

Related

ValueError: time data does not match

process data on user input

Openpyxl Copy Time, returns -1

Using datetime package to convert a date with time to only date.

Fatal Error (INFADI) Missing Directory - Multiprocessing python arcgis script error

Categories

Resources