Python Pandas Highlight single signs possible? - python

I am really new to the whole python development and have a question. I would like to achieve the following result:
However, in my research I only found the possibility to change the style for a whole cell.
I am doing a complete character by character comparison and would like to Colour the individual characters accordingly.
Maybe this can also be done with Python (my VBA script is very slow).
This is my python script till now:
import pandas as pd
import numpy as np
path = "XXXXX"
data = pd.read_csv(path, names=["Dir1", "Dir2", "File1", "File2",
"Diff", "Line1", "A", "Line2", "B"], sep="|")
line = 1
for ind in data.index:
if data.A[ind] == data.B[ind]:
var_ok = True
else:
#Work just with different Values
var_ok = False
var_length_A = len(str(data.A[ind]))
var_length_B = len(str(data.B[ind]))
#check length
#A is longer
if var_length_A > var_length_B:
var_longer = var_length_A
#Atos is longer
elif var_length_A < var_length_B:
var_longer = var_length_B
#Same length
else:
var_longer = var_length_A
for count in range(1,var_longer):
#read every sign
var_sign_A = mid(data.A[ind],count,1)
var_sign_B = mid(data.B[ind],count,1)
if var_sign_A != var_sign_B:
#highlight this
else:
#Do nothing
print([ind], "|\t", data.A[ind], "|\t", data.B[ind], "|\t",
var_ok, "|\t", var_length_A, "|\t", var_length_B, "|\t", var_longer)
This is a part from my VBA script:
'Wenn beide gefüllt sind
Else
counter = 1
'Zeichen für Zeichen abgleich
For counter = counter To leng
If Mid(Cells(zeile, Spalte1), counter, 1) <> Mid(Cells(zeile, Spalte2), counter, 1) Then
With Cells(zeile, Spalte2).Characters(start:=counter, Length:=1).Font
.Color = var2
.FontStyle = "Fett"
End With
With Cells(zeile, Spalte1).Characters(start:=counter, Length:=1).Font
.Color = var2
.FontStyle = "Fett"
End With
Else
With Cells(zeile, Spalte2).Characters(start:=counter, Length:=1).Font
.Color = var1
.FontStyle = "Standard"
End With
With Cells(zeile, Spalte1).Characters(start:=counter, Length:=1).Font
.Color = var1
.FontStyle = "Standard"
End With
End If
Next
End If
End If
BR & thank you :)
Marcel

Related

Depth first search algorithm skipping spaces in maze?

After concluding the first lecture of Harvard's AI course on edX, I have decided to implement the concepts taught, first being the depth-first search algorithm.
The objective of this program is to input a maze in text file mazefile and find a path from S to G using the depth-first search algorithm.
The project currently consists of 4 files, (1) the code with the class methods to operate or use the (2) text file which contains the maze, another text file (3) that contains the result file (where the AI has explored) and the main python script (4). Here they are, feel free to copy and paste these into a folder and to see how they run.
processText.py (file 1)
#code to process the mazefile file.
class importMaze:
def __init__(self,maze):
self.fileLines = []
self.fileName = maze
self.switch = False
self.toBeReturned = []
def processThis(self):
f = open(self.fileName,"r")
for x in f:
self.fileLines.append(x[:-1])
f.close()
for i in self.fileLines:
if self.switch == True:
if str(i) == "END":
self.switch = False
else:
self.toBeReturned.append(i)
else:
if str(i) == "START":
self.switch = True
return self.toBeReturned
class mazePointer:
def __init__(self,mazearray):
self.Sample = mazearray
self.initialPosition = []
for y in range(0, len(self.Sample)):
for x in range(0,len(self.Sample[y])):
if str(self.Sample[y][x]) == "S":
self.initialPosition = [x,y]
self.currentPosition = self.initialPosition
def whatIs(self,xcoordinate,ycoordinate):
return (self.Sample[ycoordinate])[xcoordinate]
def nearbyFreeSpaces(self,search):
self.freeSpaces = []
if self.whatIs(self.currentPosition[0]-1,self.currentPosition[1]) == search:
self.freeSpaces.append([self.currentPosition[0]-1,self.currentPosition[1]])
if self.whatIs(self.currentPosition[0]+1,self.currentPosition[1]) == search:
self.freeSpaces.append([self.currentPosition[0]+1,self.currentPosition[1]])
if self.whatIs(self.currentPosition[0],self.currentPosition[1]-1) == search:
self.freeSpaces.append([self.currentPosition[0],self.currentPosition[1]-1])
if self.whatIs(self.currentPosition[1],self.currentPosition[1]+1) == search:
self.freeSpaces.append([self.currentPosition[1],self.currentPosition[1]+1])
return self.freeSpaces
def moveTo(self,position):
self.currentPosition = position
TestingTrack.py (the main file)
from processText import importMaze, mazePointer
testObject = importMaze("mazefile")
environment = testObject.processThis()
finger = mazePointer(environment)
frontier = []
explored = []
result = ""
def Search():
global result
if len(finger.nearbyFreeSpaces("G")) == 1: #If the goal is bordering this space
result = finger.nearbyFreeSpaces("G")[0]
explored.append(finger.currentPosition)
else:
newPlaces = finger.nearbyFreeSpaces("F") #finds the free spaces bordering
for i in newPlaces:
if i in explored: #Skips the ones already visited
pass
else:
frontier.append(i)
while result == "":
explored.append(finger.currentPosition)
Search()
finger.moveTo(frontier[-1])
frontier.pop(-1)
exploredArray = []
for y in range(len(environment)): #Recreates the maze, fills in 'E' in where the AI has visited.
holder = ""
for x in range(len(environment[y])):
if [x,y] in explored:
holder+= "E"
else:
holder+= str(environment[y][x])
exploredArray.append(holder)
def createResult(mazeList,title,append): #Creating the file
file = open("resultfile",append)
string = title + " \n F - Free \n O - Occupied \n S - Starting point \n G - Goal \n E - Explored/Visited \n (Abdulaziz Albastaki 2020) \n \n (top left coordinate - 0,0) \n "
for i in exploredArray:
string+= "\n" + str(i)
string+= "\n \n Original problem \n"
for i in environment:
string+= "\n" +str(i)
file.write(string)
file.close()
def tracingPath():
initialExplored = explored
proceed = True
newExplored = []
for i in explored:
finger.moveTo() #incomplete
print(explored)
createResult(exploredArray,"DEPTH FIRST SEARCH", "w")
mazefile (the program will read this file to get the maze)
F - Free
O - Occupied
S - Starting point
G - Goal
(Abdulaziz Albastaki 2020)
START
OOOOOOOOOOOOOOOO
OFFFFFFFFFFFFFGO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OSFFFFFFFFFFFFFO
OOOOOOOOOOOOOOOO
END
Made by Abdulaziz Albastaki in October 2020
You can change the maze and its size however it must
-Respect the key above
-Have ONE Starting point and goal
-The maze must be in between 'START' and 'END'
-The maze MUST be surrounded by occupied space
SAMPLE PROBLEMS:
OOOOOOOOOOOOOOOO
OFFFFFFFFFFFFFGO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OSFFFFFFFFFFFFFO
OOOOOOOOOOOOOOOO
OOOOOOOOOOOOOOOOO
OFOFFFFFOOOFFFOOO
OFFFOOOFOOFFOOOFO
OFOOOOOFOOFOOOOFO
OSFGFFFFFFFFFFFFO
OOOOOOOOOOOOOOOOO
There is also a resultfile, however if you would just create an empty textfile with that name (no extension), the program will fill it in with results.
The problem is with the resultfile, here it is:
DEPTH FIRST SEARCH
F - Free
O - Occupied
S - Starting point
G - Goal
E - Explored/Visited
(Abdulaziz Albastaki 2020)
(top left coordinate - 0,0)
OOOOOOOOOOOOOOOO
OFFFFFFFFFFFFFGO
OFOOOOOOOOOOOOEO
OFOOOOOOOOOOOOEO
OFOOOOOOOOOOOOEO
OEOOOOOOOOOOOOEO
OEFFFEEEEEEEEEEO
OOOOOOOOOOOOOOOO
Original problem
OOOOOOOOOOOOOOOO
OFFFFFFFFFFFFFGO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OFOOOOOOOOOOOOFO
OSFFFFFFFFFFFFFO
OOOOOOOOOOOOOOOO
The AI skipped a few spaces to get to the goal, why is it doing so?
Feel free to ask me for any clarifications.
There are the following issues:
the last if block in nearbyFreeSpaces uses a wrong index:
if self.whatIs(self.currentPosition[1],self.currentPosition[1]+1) == search:
self.freeSpaces.append([self.currentPosition[1],self.currentPosition[1]+1])
should be:
if self.whatIs(self.currentPosition[0],self.currentPosition[1]+1) == search:
self.freeSpaces.append([self.currentPosition[0],self.currentPosition[1]+1])
The final position is not correctly added to the path. The last line of this block:
if len(finger.nearbyFreeSpaces("G")) == 1: #If the goal is bordering this space
result = finger.nearbyFreeSpaces("G")[0]
explored.append(finger.currentPosition)
...should be:
explored.append(result)

Cyclic relations between nodes A>B>C>A

This is how it looks like before and after, for the problem am trying to solve using Python. I have been trying for weeks. And am failing so miserable to tell Python to do the following:
STEP1: If you find on this document: "LinkedTo=" * (Example value: Node_3)*
STEP2: Then Stop
STEP3: Go to the previous NodePosX= and copy the value * (Example value: 10)*
STEP4: Go to the previous NotePosY= and copy the value * (Example value: 100)*
STEP5: Then find the next "Node_3" on the document
STEP6: And replace inside the NodePosX=30 and NodePosY=300 for the copied values 10 and 100
STEP7: Then look for the next "LinkedTo=" * (Example value: Node_5)* and repeat the STEP2 to STEP5
This is how it looks like Before running the Python script:
Begin
Name="Node_1"
NodePosX=10
NodePosY=100
LinkedTo=Node_3
LinkedTo=Node_5
End Object
Begin
Name="Node_2"
NodePosX=20
NodePosY=200
End Object
Begin
Name="Node_3"
NodePosX=30
NodePosY=300
End Object
Begin
Name="Node_4"
NodePosX=40
NodePosY=400
End Object
Begin
Name="Node_5"
NodePosX=50
NodePosY=500
End Object
This is how it should look like AFTER running the Python script:
Begin
Name="Node_1"
NodePosX=10
NodePosY=100
LinkedTo=Node_3
LinkedTo=Node_5
End Object
Begin
Name="Node_2"
NodePosX=20
NodePosY=200
End Object
Begin
Name="Node_3"
NodePosX=10
NodePosY=100
End Object
Begin
Name="Node_4"
NodePosX=40
NodePosY=400
End Object
Begin
Name="Node_5"
NodePosX=10
NodePosY=100
End Object
Do you think am asking to much from Python to do?
Any better suggestions for the title to this problem?
I hired a developer and this is the code they wrote
'''
By: Alex Reichenbach
'''
import re
begin_regex = re.compile("Begin")
name_regex = "(?<=Name=\"Node_).*(?=\")"
posX_regex = "(?<=NodePosX=).*"
posY_regex = "(?<=NodePosY=).*"
linkedTo_regex = "(?<=LinkedTo=).*"
end_regex = re.compile("End Object")
## Reading the contents of the file
text = open("1-Example-Original.txt", "r").read()
class Node:
def __init__(self):
self.name = ""
self.nodePosX = 0
self.nodePosY = 0
self.linked_to = []
def __str__(self):
linked = ""
for l in self.linked_to:
linked += "\nLinkedTo="+l
return """Begin
Name="%s"
NodePosX=%s
NodePosY=%s%s
End Object
"""%(self.name, self.nodePosX, self.nodePosY, linked)
## Read the text into the node objects
nodes = []
current_node = None
for line in text.split('\n'): ## Iterate through each line
if begin_regex.match(line): ## Begin
current_node = Node()
nodes.append(current_node)
elif re.findall(name_regex, line): ## Name
name = re.findall(name_regex, line)[0]
current_node.name = name
elif re.findall(posX_regex, line): ## PosX
posX = re.findall(posX_regex, line)[0]
current_node.nodePosX = posX
elif re.findall(posY_regex, line): ## PosY
posY = re.findall(posY_regex, line)[0]
current_node.nodePosY = posY
elif re.findall(linkedTo_regex, line): ## LinkedTo
name = re.findall(linkedTo_regex, line)[0]
current_node.linked_to.append(name)
## Copy the linked_to attributes
for i in range(len(nodes)):
for j in range(i, len(nodes)):
node1 = nodes[i]
node2 = nodes[j]
if node2.name in node1.linked_to:
node2.nodePosX = node1.nodePosX
node2.nodePosY = node1.nodePosY
## Print it all out
s = ""
for node in nodes:
s += str(node)
print(s)
## Write to File?
open("_edited.txt", "w").write(s)

Python 2.7 - manipulate some data from a CSV file

First of all I wanna emphasize that I'm a total beginner at python, the below code I made to manipulate some data from a CSV. I know that it's not the prettiest code and probably I could have made it more elegant, but it works, until a certain point and that's the reason I opened this question
import csv
from numpy import interp
from operator import sub
import math
import pandas as pd
from Tkinter import *
import Tkinter as tk
import tkFileDialog as filedialog
root = Tk()
root.withdraw()
filename= filedialog.askopenfilename( initialdir="C:/", title="select file", filetypes=(("CSV files", "*.CSV"), ("all files", "*.*")))
id_uri = []
ore = []
minute = []
zile = []
activi = []
listx = []
listsa = []
list_ore = []
listspi = []
listspf = []
list_min = []
zile_luna = 0
test = []
nume = []
with open (filename) as p, open ('activi.csv') as a:
reader = csv.reader(p,delimiter=',')
for row in reader:
id_uri.append(row[0])
ore.append(row[1])
minute.append(row[2])
zile.append(row[3])
reader = csv.reader(a)
for row in reader:
activi.append(row[0])
nume.append(row[1])
id_uri = map(int, id_uri)
ore = map(float, ore)
minute = map(float, minute)
minute = interp(minute,[0,60],[0,100])
ore = ore + minute/100
zile = map(int, zile)
activi = map(int, activi)
zile_luna = len(set(zile))+1
mimin = 0
maxim = 0
def pontaj():
global listx
global listsa
global listspi
global listspf
global list_ore
global list_min
global maxim
global minim
for x in range(3):
for y in range(len(id_uri)):
if zile[y] == z:
if activi[x] == id_uri[y]:
listx.append(ore[y])
minim = min(listx)
maxim = max(listx)
listsa.append(maxim-minim)
listx = []
listspi = [int(i) for i in listsa]
listspf = [i%1 for i in listsa]
for i in range(len(listspf)):
listspf[i] = round(listspf[i], 2)
listspf[i] = listspf[i]*100
listspf[i] = interp(listspf[i],[0,100],[0,60])
listspf[i] = int(listspf[i])
list_ore.append(listspi)
list_min.append(listspf)
listsa = []
for z in range(1,zile_luna):
pontaj()
for sublst in list_ore:
for item in range(len(sublst)):
sublst[item] = str(sublst[item])
for sublst in list_min:
for item in range(len(sublst)):
sublst[item] = str(sublst[item])
for i in range(len(list_ore)):
for j in range(len(list_ore[i])):
list_ore[i][j] = ' '.join(i + ':' + j for i,j in zip(list_ore[i][j],list_min[i][j]))
df = pd.DataFrame(list_ore)
df = df.T
nume = pd.Series(nume)
df['e'] = nume.values
df.to_csv('pontaj.csv', index = False, header = False)
print df
and the CSV file I read all the info from looks like this(employee code, hour, minute, day):
23,5,00,1
23,6,00,1
24,7,00,1
25,8,00,1
24,9,00,1
25,11,00,1
24,7,00,2
25,8,00,2
24,9,00,2
25,11,00,2
23,5,00,4
23,6,00,4
24,7,00,4
25,8,00,4
24,9,00,4
25,11,00,4
I have another CSV file that has employee code folowed by employee name like this:
23,aqwe
24,beww
25,cwww
Basically it's an attendance logger, it compares info from one CSV to another, finds the min and max hours in a certain day, subtracts min from max and writes this info in a list that is written to another csv.
Thing is, if all employees attend a certain day, all goes well, it calculates the attendance hours, puts them in the csv, all good. But what will happen if an employee skips one day? well as I found out, it ruins the calculation, because the code requires that all data must be consistent and in a perfect order.
The data written to the CSV file must finally look like this:
day1 day2 day3
hours hours hours employee_a
hours hours hours employee_b
hours hours hours employee_c
But if one skips a day, the hours get scrambled.
I've tried some different approaches but none worked, and I realize the problem is due to my simple way of thinking, but as I said, I only started with python a few days ago.
Do you have any suggestions on how I could improve the code to take the missed day of a certain employee in consideration and generate the data like so:
day1 day2 day3
1:20 2:30 3:40 employee_a
1:20 2:30 3:40 employee_b
0:0 2:30 3:40 employee_c
Any advice would be appreciated, thanks!

Elasticsearch scroll upper limit - python api

Is there a way using the python api to set an upper limit to the number of documents that are retrieved if we scroll in chunks of a specific size. So let's say I want a maximum of 100K documents being scrolled in chunks of 2K, where there are over 10Mil documents available.
I've implemented a counter like object but I want to know if there is a more natural solution.
es_query = {"query": {"function_score": {"functions": [{"random_score": {"seed": "1234"}}]}}}
es = Elasticsearch(ADDRESS, port=PORT)
result = es.search(
index="INDEX",
doc_type="DOC_TYPE",
body=es_query,
size=2000,
scroll="1m")
data = []
for hit in result["hits"]["hits"]:
for d in hit["_source"]["attributes"]["data_of_interest"]:
data.append(d)
do_something(*args)
scroll_id = result['_scroll_id']
scroll_size = result["hits"]["total"]
i = 0
while(scroll_size>0):
if i % 10000 == 0:
print("Scrolling ({})...".format(i))
result = es.scroll(scroll_id=scroll_id, scroll="1m")
scroll_id = result["_scroll_id"]
scroll_size = len(result['hits']['hits'])
data = []
for hit in result["hits"]["hits"]:
for d in hit["_source"]["attributes"]["data_of_interest"]:
data.append(d)
do_something(*args)
i += 1
if i == 100000:
break
To me if you only want the first 100K you should narrow your query in the first place. That wills speed up your process. You can add a filter on date for example.
Regarding the code I do not know other way than using the counter. I would just correct the indentation and remove the if statement for readability.
es_query = {"query": {"function_score": {"functions": [{"random_score": {"seed": "1234"}}]}}}
es = Elasticsearch(ADDRESS, port=PORT)
result = es.search(
index="INDEX",
doc_type="DOC_TYPE",
body=es_query,
size=2000,
scroll="1m")
data = []
for hit in result["hits"]["hits"]:
for d in hit["_source"]["attributes"]["data_of_interest"]:
data.append(d)
do_something(*args)
scroll_id = result['_scroll_id']
scroll_size = result["hits"]["total"]
i = 0
while(scroll_size > 0 & i < 100000):
print("Scrolling ({})...".format(i))
result = es.scroll(scroll_id=scroll_id, scroll="1m")
scroll_id = result["_scroll_id"]
scroll_size = len(result['hits']['hits'])
# data = [] why redefining the list ?
for hit in result["hits"]["hits"]:
for d in hit["_source"]["attributes"]["data_of_interest"]:
data.append(d)
do_something(*args)
i ++

Python unified diff with line numbers from both "files"

I'm trying to figure out a way to create unified diffs with line numbers only showing N lines of context. I have been unable to do this with difflib.unified_diff. I need to show changes in both files.
The closest I can come is using diff on the command line like so:
/usr/bin/diff
--unchanged-line-format=' %.2dn %L'
--old-line-format="-%.2dn %L"
--new-line-format="+%.2dn %L"
file1.py
file2.py
BUT I only want to show N lines of context, and /usr/bin/diff doesn't seem to support context with a custom line format (eg. -U2 is not compatible with --line-format "conflicting output style options").
Below is an example of what I'd like to accomplish (the same output as the above diff, but only showing 1 line of context surrounding changes):
+01: def renamed_function()
-01: def original_function():
02:
+03: """ Neat stuff here """
04:
21:
+22: # Here's a new comment
23:
85: # Output the value of foo()
+86: print "Foo is %s"%(foo())
-86: print foo()
87:
I was able to figure out something very close to what I wanted to do. It's slower than regular diff, though. Here's the entire code, from my project GitGate.
def unified_diff(to_file_path, from_file_path, context=1):
""" Returns a list of differences between two files based
on some context. This is probably over-complicated. """
pat_diff = re.compile(r'## (.[0-9]+\,[0-9]+) (.[0-9]+,[0-9]+) ##')
from_lines = []
if os.path.exists(from_file_path):
from_fh = open(from_file_path,'r')
from_lines = from_fh.readlines()
from_fh.close()
to_lines = []
if os.path.exists(to_file_path):
to_fh = open(to_file_path,'r')
to_lines = to_fh.readlines()
to_fh.close()
diff_lines = []
lines = difflib.unified_diff(to_lines, from_lines, n=context)
for line in lines:
if line.startswith('--') or line.startswith('++'):
continue
m = pat_diff.match(line)
if m:
left = m.group(1)
right = m.group(2)
lstart = left.split(',')[0][1:]
rstart = right.split(',')[0][1:]
diff_lines.append("## %s %s ##\n"%(left, right))
to_lnum = int(lstart)
from_lnum = int(rstart)
continue
code = line[0]
lnum = from_lnum
if code == '-':
lnum = to_lnum
diff_lines.append("%s%.4d: %s"%(code, lnum, line[1:]))
if code == '-':
to_lnum += 1
elif code == '+':
from_lnum += 1
else:
to_lnum += 1
from_lnum += 1
return diff_lines

Categories

Resources