I'm trying to do an academic project (I cannot use any of the libraries from python) to construct a graph data structure from teh data in a csv file.
First I'm reading the csv file and put the information into a dictionary, using:
def github_csv():
with open('Github1.csv', 'r') as csv_file:
data = csv.DictReader(csv_file)
next(data)
for row in data:
print(row)
The output is:
{'follower': '9236', 'followed': '1570'}
{'follower': '13256', 'followed': '9236'}
{'follower': '9236', 'followed': '13256'}
My first doubt is there any way to put it like this:
'9236': ['1570', '13256']
'13256': ['9236']
Then how can I assign the key value to a vertex and corresponding values for another vertex and then create the edges?
My graph class is:
class Graph:
def __init__(self, directed=False):
self._directed = directed
self._number = 0
self._vertices = {}
def insert_vertex(self, x):
v = Vertex(x)
self._vertices[v] = {}
self._number = len(self._vertices)
return v
def insert_edge(self, u, v, x=None):
e = Edge(u, v, x)
self._vertices[u][v] = e
self._vertices[v][u] = e
EDIT:
class Vertex:
__slots__ = "_element"
def __init__(self, x):
self._element = x
def vertice(self):
return self._element
def __eq__(self, other):
if isinstance(other, Vertex):
return self._element == other.vertice()
return False
def __repr__(self):
return '{0}'.format(self._element)
def __hash__(self):
return hash(id(self))
class Edge:
__slots__ = '_origin', '_destination', '_weight'
def __init__(self, u, v, p=None):
self._origin = u
self._destination = v
self._weight = p
def __hash__(self):
return hash((self._origin, self._destination))
def __repr__(self):
if self._weight is None:
return '({0}, {1})'.format(self._origin, self._destination)
return '({0}, {1}, {2})'.format(self._origin, self._destination, self._weight)
def endpoints(self):
return self._origin, self._destination
def opposite(self, v):
return self._origin if v is self._destination else self._origin
def cost(self):
return self._weight
def show_edge(self):
print('(', self._origin, ', ', self._destination, ') com peso', self._weight)
Regarding the first question:
lista = [{'follower': '9236', 'followed': '1570'},
{'follower': '13256', 'followed': '9236'},
{'follower': '9236', 'followed': '13256'}]
rel_dict = {}
for d in lista:
if d["follower"] in rel_dict.keys():
rel_dict[d["follower"]].append(d["followed"])
else:
rel_dict[d["follower"]] = [d["followed"]]
rel_dict:
{'9236': ['1570', '13256'], '13256': ['9236']}
EDIT2 (with Vertex and Edge definition):
To add these data to the graph:
graph = Graph()
for k,v in rel_dict.items():
k_vertex = graph.insert_vertex(k)
for v_item in v:
v_item_vertex = graph.insert_vertex(v_item)
graph.insert_edge(k_vertex, v_item_vertex)
Assuming you get row as the dictionaries you've mentioned,
edges_dict = {}
def github_csv():
with open('Github1.csv', 'r') as csv_file:
data = csv.DictReader(csv_file)
next(data)
for row in data:
edges_dict[ row['follower'] ].append(row['followed'])
This should give you an edges_dict dictionary of lists as required.
Related
Update
Thanks to the comments of some community members, I realize that there are some similar problems, but they may a bit different, please allow me to explain it further.
I actually hope to use the same method in a real problem, So briefly:
Reuse of edges in differernt path is completely allowed
a unique(or a new) path from A to B is defined as a collection of vertices that have any different vertices.
Let me use a quiz from Python data structure and algorithm analysis by Bradley .N Miller and David L. Ranum to expain my qusetion.
Quesion:
Consider the task of converting the word FOOL to SAGE, also called word ladder problem. In solving
In the word ladder problem, only one letter must be replaced at a time, and the result of each step must be a word, not non-existent.
Input:
FOUL
FOOL
FOIL
FAIL
COOL
FALL
POOL
PALL
POLL
POLE
PALE
PAGE
SALE
POPE
POPE
SAGE
We can easily find the path from FOOL to SAGE, as Bradley showed:
enter image description here
and I used Breadth First Search (BFS) to solve probem:
class Vertex:
def __init__(self, key, value = None):
self.id = key
self.connectedTo = {}
self.color = 'white'
self.dist = sys.maxsize
self.pred = []
self.disc = 0
self.fin = 0
self.value = value,
#self.GraphBulided = False
self.traverseIndex = 0
self.predNum = 0
def addNeighbor(self, nbr, weight=0):
self.connectedTo[nbr] = weight
def __str__(self):
return '{} connectedTo: {}'.format(self.id, \
str([x.id for x in self.connectedTo]))
def setColor(self, color):
self.color = color
def setDistance(self, d):
self.dist = d
#I want store all Pred for next traverse so I use a list to do it
def setPred(self, p, list = False):
if not list:
self.pred = p
else:
self.pred.append(p)
self.predNum += 1
def setDiscovery(self,dtime):
self.disc = dtime
def setFinish(self,ftime):
self.fin = ftime
#def setGraphBulided(self, tag = True):
# self.GraphBulided = tag
def getFinish(self):
return self.fin
def getDiscovery(self):
return self.disc
def getPred(self):
if isinstance(self.pred, list):
if self.traverseIndex < self.predNum:
return self.pred[self.traverseIndex]
else:
return self.pred[-1]
else:
return self.pred
def __hash__(self):
return hash(self.id)
def getPredById(self):
if self.traverseIndex < self.predNum and isinstance(self.pred, list):
pred = self.pred[self.traverseIndex]
self.traverseIndex += 1
print("vertix {}: {} of {} preds".format(self.id, self.traverseIndex, self.predNum))
return [pred, self.traverseIndex]
else:
pred = None
return [pred, None]
def getCurrPredStaus(self):
#if not self.pred:
# return None
return self.predNum - self.traverseIndex
def getDistance(self):
return self.dist
def getColor(self):
return self.color
def getConnections(self):
return self.connectedTo.keys()
def getId(self):
return self.id
def getWeight(self, nbr):
return self.connectedTo[nbr]
def getValue(self):
return self.value
def findPath(self, dest):
pass
class Graph:
def __init__(self):
self.vertList = {}
self.numVertics = 0
self.verticsInSerach = set()
self.GraphBulided = False
def addVertex(self, key, value = None):
self.numVertics = self.numVertics + 1
newVertex = Vertex(key, value=value)
self.vertList[key] = newVertex
return newVertex
def getVertex(self, n):
if n in self.vertList:
return self.vertList[n]
else:
return None
def __contains__(self, n):
return n in self.vertList
def addEdge(self, f, t, cost = 0, fvalue = None, tvalue = None):
if f not in self.vertList:
nv = self.addVertex(f, fvalue)
if t not in self.vertList:
nv = self.addVertex(t, tvalue)
self.vertList[f].addNeighbor(self.vertList[t], cost)
def setGraphBulided(self, tag = True):
self.GraphBulided = tag
def getVertices(self):
return self.vertList.keys()
def setGraphBulided(self, tag = True):
self.GraphBulided = tag
def setSerachedVertixs(self, vertix):
self.verticsInSerach.add(vertix)
def getGraphBulided(self):
return self.GraphBulided
def getSerachedVertixs(self):
return self.verticsInSerach
def __iter__(self):
return iter(self.vertList.values())
def __hash__(self):
hashIds = [x for x in self.getVertices()]
if len(hashIds) > 0 and hashIds[0]:
return hash(', '.join(hashIds))
else:
return None
Here are some additional functions for building graphs
def buildGraph(wordFile, DFSgraph = False):
d = {}
g = Graph()
if DFSgraph:
g = DFSGraph()
wfile = open(wordFile)
for line in wfile:
word = line[:-1]
for i in range(len(word)):
bucket = word[:i] + '_' + word[i+1:]
if bucket in d:
d[bucket].append(word)
else:
d[bucket] = [word]
for bucket in d.keys():
for word1 in d[bucket]:
for word2 in d[bucket]:
if word1 != word2:
g.addEdge(word1, word2)
wfile.close()
return g
class Queue:
def __init__(self):
self.items = []
def isEmpty(self):
return self.items == []
def enqueue(self, item):
self.items.insert(0,item)
def dequeue(self):
return self.items.pop()
def size(self):
return len(self.items)
def bfs(g, start, listpred = False):
start.setDistance(0)
start.setPred(None)
vertQueue = Queue()
vertQueue.enqueue(start)
while (vertQueue.size() > 0):
currentVert = vertQueue.dequeue()
if currentVert.getConnections():
g.setSerachedVertixs(currentVert)
for nbr in currentVert.getConnections():
#print('sreach {}'.format(currentVert.getId()))
if (nbr.getColor() == 'white' or nbr.getColor() == 'gray'):
nbr.setColor('gray')
nbr.setDistance(currentVert.getDistance() + 1)
if nbr.predNum > 0 and currentVert.getId() not in [x.getId() for x in nbr.pred]:
nbr.setPred(currentVert, listpred)
elif nbr.predNum == 0:
nbr.setPred(currentVert, listpred)
vertQueue.enqueue(nbr)
currentVert.setColor('black')
Therefore, we can easily find the shortest path we need (If we only store one pred for one vertix).
wordGraph = buildGraph('fourletterwords1.txt', DFSgraph=False)
bfs(wordGraph, wordGraph.getVertex('FOOL'), listpred=True)
def traverse(y):
x=y
while(x.getPred()):
print(x.getPred())
x = x.getPred()
print(x.getId())
traverse(wordGraph.getVertex('SAGE'))
However, I still don't know how to trace all the paths correctly, can you give me some suggestions?
FIND path from src to dst ( Dijkstra algorithm )
ADD path to list of paths
LOOP P over list of paths
LOOP V over vertices in P
IF V == src OR V == dst
CONTINUE to next V
COPY graph to working graph
REMOVE V from working graph
FIND path from src to dst in working graph( Dijkstra algorithm )
IF path found
IF path not in list of paths
ADD path to list of paths
By using this code I'm able to hash only 1 record without any errors or warnings. How can I hash a hundred thousand records taken as input from the CSV file?
import pandas as pd
proper = []
with open("C:\\Users\\krupa\\Downloads\\proper.csv","r") as f:
for line in f:
tokens = line.split(',')
order_id =tokens[0]
country = tokens[1]
proper.append([order_id,country])
#print(proper)
proper = {}
with open("C:\\Users\\krupa\\Downloads\\proper.csv","r") as f:
for line in f:
tokens = line.split(',')
order_id =tokens[0]
country = tokens[1]
proper[order_id] = country
#print(proper)
def get_hash(key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % 100
get_hash('503618705')
class HashTable:
def __init__(self):
self.MAX = 100
self.arr = [None for i in range(self.MAX)]
def get_hash(self, key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % self.MAX
def __getitem__(self, index):
h = self.get_hash(index)
return self.arr[h]
def __setitem__(self, key, val):
h = self.get_hash(key)
self.arr[h] = val
def __delitem__(self, key):
h = self.get_hash(key)
self.arr[h] = None
t = HashTable()
t["503618705"] = "Tanzania"
t.arr
print(t.arr)
The code is free of errors but I want to hash all the records in the CSV File
What you need to do, is acctually use your methods.
right now you __init__ a new object t. Then you are refering to a index in your list and set "Tanzania" as value. så you actually dont use your methods in your object t only the function list, so what you can do is (I hope I understanded your question correctly.):
lst = ['Sweden', 'Germany', 'Pakistan', 'Syria', 'Norway']
idx = ["1234", "30", "500", "2034", "443"]
def get_hash(key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % 100
get_hash('50')
class HashTable:
def __init__(self):
self.MAX = 100
self.arr = [None for i in range(self.MAX)]
def get_hash(self, key):
key = int(key, base=10)
hash_key = 0
for i in range(key):
hash_key += 1
return hash_key % self.MAX
def __getitem__(self, index):
h = self.get_hash(index)
return self.arr[h]
def __setitem__(self, key, val):
h = self.get_hash(key)
self.arr[h] = val
def __delitem__(self, key):
h = self.get_hash(key)
self.arr[h] = None
t = HashTable()
for n, i in enumerate(lst):
t.__setitem__(idx[n], lst[n])
for i in range(len(idx)):
print(t.__getitem__(idx[i]))
print(t.get_hash(idx[i]))
So I'm trying to go through a book used to teach different types of models and algorithms and I came upon this issue. My code is below. Essentially it generates a text output of a graph. Individual points are referred to as "vertices" and the lines connecting them are "edges". What I'm currently trying to do is check if an edge exists between two vertices.
class Graph(dict):
def __init__(self, vs=[], es=[]):
for v in vs:
self.add_vertex(v)
for e in es:
self.add_edge(e)
def add_vertex(self, v):
self[v] = {}
def add_edge(self, e):
v,w = e
self[v][w] = e
self[w][v] = e
def get_edge(self, v, w):
g = dict(self)
keys = g.keys()
return type(keys[0])
def remove_edge(self, e):
pass
def vertices(self):
keys = self.keys()
return keys
class Vertex(object):
def __init__(self, label=''):
self.label = label
def __repr__(self):
return 'Vertex(%s)' % repr(self.label)
__str__ = __repr__
class Edge(tuple):
def __new__(cls, e1, e2):
return tuple.__new__(cls, (e1, e2))
def __repr__(self):
return 'Edge(%s, %s)' % (repr(self[0]), repr(self[1]))
__str__ = __repr__
v = Vertex('v')
w = Vertex('w')
e = Edge(v, w)
print e
g = Graph([v,w],[e])
print g
edge = Graph.get_edge(g, 'v', 'w')
print edge
The issue is here:
def get_edge(self, v, w):
g = dict(self)
keys = g.keys()
return type(keys[0])
I cannot access the values in the dictionary because I can't use the keys, the return type line shows why:
Output:
Edge(Vertex('v'), Vertex('w'))
{Vertex('v'): {Vertex('w'): Edge(Vertex('v'), Vertex('w'))}, Vertex('w'):{Vertex('v'): Edge(Vertex('v'), Vertex('w'))}}
<class '__main__.Vertex'>
The issue is the keys aren't strings, integers, or really anything I can reference, they're generated from calling the Vertex class. Is there some way I could reference the keys that I'm missing? My goal is to have the method return the Edge requested if it exists.
I'm looking for a convenient way to remove an instance of a class object which is contained in two dictionaries. When I delete the object instance from one dict, I should automatically be deleted in the second dict. Is this anyhow possible?
class node():
def __init__(self, vID):
self.vID = vID
def __hash__(self):
return hash(self.vID)
def __eq__(self, other):
return self.vID == other.vID
class structure():
def __init__(self):
self.point_index_a = {}
self.point_index_b = {}
def add_point(self,x,y):
x_object = node(x)
self.point_index_a[x_object] = None
self.point_index_b[x_object] = None
def a_iter(self):
for k,v in self.point_index_a.iteritems():
print k,v
def b_iter(self):
for k,v in self.point_index_b.iteritems():
print k,v
mg = structure()
mg.add_point(1, 8)
mg.add_point(3, 4)
# point index a
for k,v in mg.point_index_a.iteritems():
print k,v
# point index b
for k,v in mg.point_index_b.iteritems():
print k,v
to_del = mg.point_index_a.iterkeys().next()
del to_del
# point index a, object to_del still exists in both dicts
for k,v in mg.point_index_a.iteritems():
print k,v
# point index b
for k,v in mg.point_index_b.iteritems():
print k,v
I would implement as follows:
class structure():
...
def remove(self, point):
del self.point_index_a[point]
del self.point_index_b[point]
I am trying to create a class that will be able to make all possible calculations between matrix. the input is a matrix (doesn't matter the size), and on that matrix the class should do any calculations - either multiply, combine or substract. The different functions are as follow:
I need to have in this class:
__init__(self, data)
get_width(self)
get_height(self)
add(self, m)
scalar_multiply(self, a)
subtract(self, m)
multiply(self, m)
compare(self, m)
Thanks alot, it also needs to be with simple actions.
This is my code till now:
class Matrix:
def __init__(self, data):
self.data = data
def get_width(self):
return len(self.data[0])
def get_height(self):
return len(self.data)
def add(self, m):
lines = []
for j in range(len(self.data)):
line = []
for i in range(len(self.data[j])):
line.append(self.data[j][i] + m[j][i])
lines.append(line)
return lines
def scalar_multiply(self, a):
res = []
for j in range(len(self.data)):
line = []
for i in range(len(self.data)):
line.append(self.data[j][i]*a)
res.append(line)
return res
def subtract(self, m):
lines = []
for j in range(len(self.data)):
line = []
for i in range(len(self.data)):
line.append(self.data[j][i] - m[j][i])
lines.append(line)
return lines
def multiply(self, m):
lines = []
for j in range(len(self.data-1)):
line = []
for i in range(len(m[0])-1):
schum = 0
for k in range(len(self.data[0]-1)):
schum = self.data[j][k]*m[k][i]
line.append(schum)
lines.append(line)
return lines
def compare(self, m):
for j in range(len(self.data)):
for i in range(len(self.data)[j]):
while self.data[j][i] == m[j][i]:
return True
else:
return False
Do you need to create this yourself? If not, there is already an excellent implementation of this in numpy