Add column data to ASE Atom Object json File - python

I am trying to add Columns data i.e. Temperature, Molarity etc to an ASE Atom Object in a .json file, which is formated as follows,
{"1": {
"cell": {"__ndarray__": [[3, 3], "float64", [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]},
"ctime": 23.062761176728078,
"mtime": 23.062761176728078,
"numbers": {"__ndarray__": [[63], "int32", [35, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]},
"pbc": {"__ndarray__": [[3], "bool", [false, false, false]]},
"positions": {"__ndarray__": [[63, 3], "float64", [1.02088, -0.07049, -0.03212, 1.0973, -3.2788, 3.87728, 2.61528, -3.29081, 3.8975, 3.15998, -3.00975, 2.50308, 4.67887, -3.0308, 2.49295, 5.20986, -2.78486, 1.07948, 6.73103, -2.81918, 1.01952, 7.26742, -4.1725, 1.4482, 8.77632, -4.17848, 1.43408, 9.26424, -5.51692, 1.94735, 10.77335, -5.50232, 1.97542, 11.28332, -6.82712, 2.48445, 12.80015, -6.79016, 2.50407, 13.29845, -8.13591, 2.97445, 14.81673, -8.19203, 2.94905, 15.31473, -9.54583, 3.43333, 14.81153, -10.67945, 2.53755, 15.25421, -12.06622, 3.00288, 16.77287, -12.17072, 3.02548, 14.67692, -12.39547, 4.37401, 14.70058, -13.08165, 1.99749, 0.6998, -3.46574, 4.87783, 0.71285, -4.05308, 3.20454, 0.7212, -2.31013, 3.5324, 2.967, -4.26501, 4.25235, 2.97929, -2.53549, 4.60251, 2.81017, -2.03287, 2.15577, 2.78347, -3.76456, 1.80124, 4.99943, -4.00574, 2.86952, 5.07838, -2.27055, 3.17637, 4.86624, -1.80795, 0.71974, 4.80511, -3.54038, 0.39431, 7.14617, -2.02959, 1.65792, 7.05652, -2.60496, -0.00639, 6.87826, -4.96031, 0.78846, 6.94637, -4.41838, 2.46536, 9.15862, -3.37521, 2.07596, 9.15115, -3.99773, 0.42005, 8.89989, -6.32829, 1.30471, 8.87374, -5.70588, 2.95654, 11.12913, -4.69141, 2.62325, 11.16301, -5.3113, 0.96742, 10.92897, -7.63817, 1.83648, 10.89587, -7.0253, 3.4914, 13.15552, -5.99488, 3.16603, 13.17901, -6.58063, 1.49654, 12.87249, -8.89368, 2.31322, 12.93068, -8.34259, 3.98611, 15.22396, -7.40473, 3.5943, 15.18383, -7.99669, 1.93376, 14.98703, -9.68396, 4.46777, 16.4104, -9.51576, 3.43886, 15.16831, -10.54649, 1.51163, 13.71549, -10.72486, 2.51819, 17.16035, -11.53426, 3.81916, 17.15315, -11.86178, 2.05054, 17.04038, -13.21533, 3.22842, 15.13502, -11.7425, 5.12017, 14.91931, -13.43378, 4.61043, 13.59434, -12.25198, 4.34169, 15.10052, -12.84284, 1.00836, 13.60916, -13.00919, 1.99737, 15.01561, -14.08621, 2.30661]]},
"unique_id": "8fc6b41faacf669e07523ea9932aae59",
"user": null},
"ids": [1],
"nextid": 2}
So far I have tried adding an additional node to the positions graph, adding a 4th dimension to the positions graph, incrementing one axis of the positions graph by the column value, adding a new key to the json file, and adding the value within the built in "tags" key, so far all have either had no effect or have given an error which cannot be solved without editing the ASE library. I am not sure what to try next or if I have missed something obvious.
Full Code:
https://github.com/nfurth1/MatDeepLearn

Related

Remove font's shadow in Sankey

Is it possible to remove the white shadow of the font in the following sankey diagram?
import plotly.graph_objects as go
fig = go.Figure(go.Sankey(
arrangement = "snap",
node = {
"label": ["A", "B", "C", "D", "E", "F"],
"x": [0.2, 0.1, 0.5, 0.7, 0.3, 0.5],
"y": [0.7, 0.5, 0.2, 0.4, 0.2, 0.3],
'pad':10}, # 10 Pixels
link = {
"source": [0, 0, 1, 2, 5, 4, 3, 5],
"target": [5, 3, 4, 3, 0, 2, 2, 3],
"value": [1, 2, 1, 1, 1, 1, 1, 2]}))
fig.show()
It certainly seems to not be possible. You can edit some text attributes through f['data'][0]['textfont'] like:
sankey.Textfont({
'color': '#2a3f5f', 'family': '"Open Sans", verdana, arial, sans-serif', 'size': 10
})
And as you can see sankey.Textfont has no attribute that can edit the properties of the "shadow". I've tried setting other values for 'family' but the shadow persists no matter what. Another peculiar detail here seems to be that the color can't be changed directly either. Only 'size' and 'family'

How can I multiply list items in a dict with another list in Python

I have a dictionary with player names and their points, and what I need to do is multiply each list item with coefficients from another list resulting in a new array with multiplied points:
points = {mark : [650, 400, 221, 0, 3], bob : ([240, 300, 5, 0, 0], [590, 333, 20, 30, 0]), james : [789, 201, 0, 0, 1]}
coefficients = [5, 4, 3, 2, 1]
So for example for Mark:
player_points = [650*5, 400*4, 221*3, 0*2, 3*1]
And for Bob:
player_points = [240*5, 300*4, 5*3, 0*2, 0*1], [590*5, 333*4, 20*3, 30*2, 0*1]
What I tried was the following but it didn't work whatsoever:
def calculate_points(points, coefficients):
i = 0
for coefficient in coefficients:
player_points = coefficient * points[i]
i += 1
return player_points
def main():
points = {"mark": [650, 400, 221, 0, 3],
"bob": ([240, 300, 5, 0, 0], [590, 333, 20, 30, 0]),
"james": [789, 201, 0, 0, 1]}
coefficients = [5, 4, 3, 2, 1]
player_points = calculate_points(points, coefficients)
print(player_points)
main()
For list multilplication you can do
player_point = [i*j for i,j in zip(point['mark'], coefficients)]
So if you want a player_point dictionnary:
player_points = {}
For name in points.keys():
player_points[name] = [i*j for i,j in zip(points[name], coefficients)]
Here is code that works using a for loop:
points = {"mark" : [650, 400, 221, 0, 3], "bob" : [240, 300, 5, 0, 0],"joe" : [590, 333, 20, 30, 0], "james" : [789, 201, 0, 0, 1]}
coefficients = [5, 4, 3, 2, 1]
for element in points:
player_points= []
for i in range(len(points.get(element))):
player_points.append(points.get(element)[i]*coefficients[i])
print(player_points)
This will give the output of
[3250,1600,663,0,3]
[1200,1200,15,0,0]
[2950,1332,60,60,0]
[3945,804,0,0,1]
Your data structure is irregular which make processing it much harder than it needs to be. If all the dictionary values were tuples, a simple dictionary comprehension could be used. As it is, you sometimes have an array, and sometimes a tuple which requires the code to deal with exceptions and type detection.
Here's how it would work if the structure was consistent (i.e. tuples for all values)
points = { "mark" : ([650, 400, 221, 0, 3],),
"bob" : ([240, 300, 5, 0, 0], [590, 333, 20, 30, 0]),
"james" : ([789, 201, 0, 0, 1],)
}
coefficients = [5, 4, 3, 2, 1]
player_points = { pl:tuple([p*c for p,c in zip(pt,coefficients)] for pt in pts)
for pl,pts in points.items() }
print(player_points)
{
'mark' : ([3250, 1600, 663, 0, 3],),
'bob' : ([1200, 1200, 15, 0, 0], [2950, 1332, 60, 60, 0]),
'james': ([3945, 804, 0, 0, 1],)
}
If you don't want to adjust your structure, you'll need a function that handles the inconsistency:
points = { "mark" : [650, 400, 221, 0, 3],
"bob" : ([240, 300, 5, 0, 0], [590, 333, 20, 30, 0]),
"james" : [789, 201, 0, 0, 1]
}
coefficients = [5, 4, 3, 2, 1]
def applyCoeffs(pts,coeffs):
if isinstance(pts,list):
return [p*c for p,c in zip(pts,coeffs)]
else:
return tuple(applyCoeffs(pt,coeffs) for pt in pts)
player_points = { pl: applyCoeffs(pts,coefficients) for pl,pts in points.items() }
print(player_points)
{
'mark' : [3250, 1600, 663, 0, 3],
'bob' : ([1200, 1200, 15, 0, 0], [2950, 1332, 60, 60, 0]),
'james': [3945, 804, 0, 0, 1]
}

Python - Problem to paint different colours in x,y and z axis with Ipyvolume library

I'm trying to create a 3D plot with Python library "ipyvolume" where every point in the plot has a colour. The points can be repeated colours. There is a problem when it paint the points in the plot. Some idea to fix this?
Import the libraries:
import pandas as pd
import numpy as np
import ipyvolume as ipv
Load the data:
dataframe = pd.read_csv("C:/Users/j/Desktop/K - Means/test.csv",sep=",")
dataframe.head()
Picture about Dataframe:
Dataframe
Creation of the axes:
X = np.array(dataframe[["op","ex","ag"]])
y = np.array(dataframe['categoria'])
Information about X:
array([[34.297953, 41.948819, 29.370315],
[44.986842, 37.938947, 24.279098],
[41.733854, 38.999896, 34.645521],
[40.377154, 52.337538, 31.082154],
[36.664677, 48.530806, 31.138871],
[33.531771, 43.211667, 25.786667],
[31.851102, 47.182362, 19.594331],
[31.865118, 55.377559, 36.258346],
[46.393488, 39.93031 , 16.658062],
[39.436667, 32.966288, 32.291591],
[52.750992, 41.698855, 17.057176],
[41.328182, 39.173333, 21.070505],
[54.407727, 34.104318, 18.771818],
[47.610076, 39.439545, 21.438409],
[39.435149, 41.479403, 21.004104],
[48.617348, 43.617955, 19.263258],
[40.073543, 44.194724, 33.921417],
[43.37292 , 43.792263, 21.067737],
[49.792403, 41.435581, 16.433953],
[30.020465, 44.29969 , 39.117984],
[36.909459, 51.947297, 34.687568],
[50.594462, 41.383154, 17.896538],
[34.186667, 18.693542, 9.682292],
[31.215455, 44.180909, 32.87 ],
[47.27686 , 41.973372, 12.40186 ],
[45.369773, 35.925909, 23.478258],
[35.943438, 45.519531, 28.02125 ],
[36.272348, 40.065152, 28.706894],
[44.501603, 46.598931, 29.535038],
[49.028308, 38.450462, 19.791538],
[34.235923, 41.231615, 14.153692],
[53.11048 , 39.00608 , 17.2064 ],
[49.28542 , 42.117786, 21.008931],
[52.895725, 38.620229, 19.972748],
[30.691797, 59.824844, 33.395938],
[34.949528, 50.177402, 36.325276],
[41.76596 , 49.865253, 30.071414],
[30.825938, 55.912578, 29.489922],
[38.948976, 44.460866, 27.345827],
[46.955854, 35.376179, 23.747561],
[45.053969, 48.950992, 24.374427],
[45.088504, 50.765276, 25.71252 ],
[42.444615, 45.780231, 24.745615],
[40.046439, 37.722197, 30.568258],
[52.535221, 35.290973, 15.793009],
[56.691163, 31.135698, 20.439651],
[48.709282, 44.728513, 19.387538],
[53.453713, 38.522321, 16.655907],
[31.450855, 45.490983, 40.583162],
[31.891474, 53.373368, 24.296316],
[49.077731, 45.670798, 17.449202],
[36.196989, 42.358817, 24.191613],
[38.91342 , 46.979524, 28.669524],
[60.225087, 28.902609, 14.337043],
[35.545054, 30.295484, 39.422796],
[56.815859, 38.419375, 13.961641],
[49.47 , 30.96626 , 23.053053],
[47.811742, 41.36447 , 20.816439],
[35.779512, 31.227724, 27.689919],
[55.974031, 33.09 , 21.330698],
[40.502021, 34.040957, 16.767979],
[38.78828 , 36.947204, 24.048172],
[52.082462, 39.402308, 16.628231],
[57.427596, 33.121827, 12.412404],
[39.528547, 42.353077, 23.810769],
[39.36155 , 40.205116, 26.27124 ],
[66.665564, 26.855564, 15.602331],
[48.587099, 26.988702, 9.948168],
[52.675729, 35.32625 , 16.510208],
[45.813043, 53.54587 , 30.403261],
[44.765313, 43.954375, 24.824609],
[42.643386, 33.345984, 14.643386],
[44.512578, 37.723594, 15.144922],
[51.830571, 44.304667, 10.049524],
[42.202857, 38.628681, 21.68989 ],
[57.241308, 33.237462, 16.194154],
[36.353298, 39.223723, 26.603617],
[35.566589, 48.679535, 29.923023],
[33.422105, 56.539263, 32.230842],
[31.7503 , 44.3443 , 39.1499 ],
[33.332362, 46.603622, 37.348898],
[41.929385, 41.960077, 17.815385],
[57.145227, 31.194545, 16.385 ],
[46.137348, 43.874697, 15.843258],
[49.331231, 34.458231, 23.982462],
[44.171154, 43.299846, 27.451538],
[49.322373, 41.494915, 14.199153],
[46.158281, 47.806719, 23.341641],
[48.355859, 35.778281, 15.101563],
[47.143474, 40.162316, 20.52 ],
[48.403333, 36.152326, 12.157829],
[40.281616, 35.341515, 20.805657],
[49.049323, 32.918647, 22.447594],
[47.737462, 41.528077, 19.694385],
[48.743333, 42.93187 , 17.984797],
[38.766702, 42.88383 , 22.15266 ],
[38.471406, 41.289922, 39.664375],
[54.911368, 42.269895, 11.263263],
[37.240989, 46.254286, 31.804286],
[46.319462, 38.176692, 14.143846],
[53.331333, 33.349333, 18.497333],
[51.006406, 36.351563, 22.484609],
[47.646364, 39.943939, 23.249848],
[32.683125, 54.681667, 35.906667],
[65.067447, 25.46617 , 14.787447],
[54.431756, 37.019847, 19.690305],
[35.834375, 44.595625, 23.930625],
[39.546441, 45.188475, 25.213644],
[41.114 , 41.884769, 19.713231],
[50.898163, 38.136837, 19.937347],
[45.669015, 44.523106, 20.548864],
[37.411719, 43.379531, 33.332422],
[31.541828, 47.688172, 28.897527],
[41.483701, 50.352283, 30.561496],
[36.813721, 52.722403, 14.703256],
[43.81828 , 42.931613, 17.494624],
[39.31561 , 30.73935 , 13.23122 ],
[63.995606, 26.921818, 9.305985],
[44.541328, 45.529453, 33.89125 ],
[35.420439, 41.05807 , 24.249737],
[45.162043, 34.678602, 22.719355],
[38.499688, 46.513828, 34.344766],
[55.293566, 49.822326, 20.592791],
[46.21 , 35.002222, 19.006667],
[54.151721, 32.722131, 11.041475],
[43.443893, 23.982901, 17.032443],
[40.120985, 27.149545, 23.975758],
[53.95 , 42.411488, 16.108347],
[48.796045, 46.014478, 14.642985],
[43.805615, 36.315846, 21.608308],
[51.161 , 44.074 , 17.386154],
[58.380294, 45.653922, 12.822843],
[40.345769, 37.003923, 17.285538],
[40.808939, 43.961591, 18.982424],
[57.962308, 33.373538, 17.684 ],
[35.569389, 38.904885, 31.624351],
[31.960417, 48.533125, 40.096458],
[71.696129, 27.57121 , 19.093548],
[51.537405, 36.465344, 23.008168],
[36.258913, 45.225652, 39.427283]])
Information about y:
array([7, 7, 4, 2, 4, 7, 7, 5, 7, 7, 3, 1, 1, 2, 8, 3, 4, 6, 2, 4, 2, 3,
3, 7, 2, 4, 8, 1, 4, 3, 8, 1, 2, 7, 4, 5, 1, 2, 2, 1, 6, 2, 6, 1,
1, 2, 6, 3, 1, 7, 2, 8, 6, 2, 8, 2, 1, 3, 8, 2, 8, 4, 2, 1, 8, 9,
1, 1, 2, 4, 6, 8, 8, 4, 9, 2, 8, 4, 4, 9, 5, 2, 4, 1, 2, 7, 2, 3,
2, 1, 2, 7, 2, 2, 1, 7, 7, 2, 4, 6, 1, 1, 1, 4, 2, 4, 2, 8, 7, 5,
9, 9, 8, 9, 7, 1, 8, 2, 4, 8, 8, 2, 2, 1, 2, 1, 6, 2, 4, 2, 1, 1,
1, 7, 3, 7, 4, 2, 1, 1], dtype=int64)
In this piece of code I am trying to add different colours by every point in the plot:
fig = ipv.figure()
colores=['blue','red','green','cyan','yellow','orange','black','pink','brown','purple']
asignar=[]
for row in y:
asignar.append(colores[row])
scatter=ipv.scatter(X[:, 0], X[:, 1], X[:, 2],marker="sphere", color=asignar, size=2)
ipv.selector_default
ipv.show()
The result of the last piece of code is an infinite execution.
Changing the scatter's color the plot is created:
fig = ipv.figure()
colores=['blue','red','green','cyan','yellow','orange','black','pink','brown','purple']
asignar=[]
for row in y:
asignar.append(colores[row])
scatter=ipv.scatter(X[:, 0], X[:, 1], X[:, 2],marker="sphere", color="red", size=2)
ipv.selector_default
ipv.show()
Plot
Suggestion from https://github.com/maartenbreddels/ipyvolume/issues/12#issuecomment-284685146 might work, something like:
import ipyvolume as ipv
import matplotlib
c = matplotlib.cm.afmhot(np.linspace(0, 1, len(y)))
ipv.quickscatter(X[:, 0], X[:, 1], X[:, 2],marker="sphere",color=c,size=2)

Plotting with more colors in matplotlib

I am trying to plot a scatter plot using matplotlib, i am getting " IndexError: pop from empty list" error and I am not sure how to fix it.
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import time
import itertools
d = {'5000cca229d10d09': {374851: 1}, '5000cca229cf3f8f': {372496:3},'5000cca229d106f9': {372496: 3, 372455: 2}, '5000cca229d0b3e4': {380904: 2, 380905: 1, 380906: 1, 386569: 1}, '5000cca229d098f8': {379296: 2, 379297: 2, 379299: 2, 379303: 1, 379306: 1, 379469: 1, 379471: 1, 379459: 1, 379476: 1, 379456: 4, 379609: 4}, '5000cca229d03957': {380160: 3, 380736: 3, 380162: 1, 380174: 1, 381072: 2, 379608: 2, 380568: 3, 380569: 1, 380570: 1, 379296: 3, 379300: 1, 380328: 3, 379306: 1, 380331: 1, 379824: 2, 379825: 1, 379827: 1, 380344: 1, 379836: 1, 379456: 3, 380737: 1, 380739: 1, 379462: 1, 379476: 1, 379992: 3, 379609: 1, 379994: 1, 379611: 1, 379621: 1, 380006: 1, 380904: 3, 380905: 1, 380907: 1, 380535: 3, 380536: 1, 380538: 1}, '5000cca229cf6d0b': {372768: 10, 372550: 15, 372616: 14, 372617: 20, 372653: 3, 372505: 2}, '5000cca229cec4f1': {372510: 132}}
colors = list("rgbcmyk")
for data_dict in d.values():
x = data_dict.keys()
#print x
#X= time.asctime(time.localtime(x))
y = data_dict.values()
#plt.scatter(x,y,color=colors.pop(),s = 60)
plt.scatter(x,y,color=colors.pop(),s = 90, marker='^')
plt.ylabel("Errors" , fontsize=18, color="Green")
plt.xlabel("Occured on",fontsize=18, color="Green")
plt.title("DDN23b", fontsize=25, color="Blue")
plt.gca().get_xaxis().get_major_formatter().set_useOffset(False)
plt.xticks(rotation='vertical')
#plt.ylim(min(y),max(y))
#plt.grid()
#for x, y in dict(itertools.chain(*[item.items() for item in d.values()])).items():
# plt.text(x, y, time.strftime("%m/%d/%y, %H:%M:%S", time.localtime(x*3600)), ha='center', va='top', rotation='vertical', fontsize = '11', fontstyle = 'italic', color = '#844d4d')
plt.xticks(plt.xticks()[0], [time.strftime("%m/%d/%y, %H:%M:%S", time.localtime(item)) for item in plt.xticks()[0]*3600])
plt.legend(d.keys())
mng = plt.get_current_fig_manager()
mng.resize(*mng.window.maxsize())
plt.subplots_adjust(bottom=.24,right=.98,left=0.03,top=.89)
plt.grid()
plt.show()
I have several data sets for d, and d is a dictionary. when the data set is smaller, it works without any errors. When the data set is large, it runs out of collars. How do I add more colors to the list so every key in "d" gets its own color.
Feel free to edit my code and make suggestions.
Colormaps are callable. When passed a float between 0 and 1, it returns an RGBA color:
In [73]: jet = plt.cm.jet
In [74]: jet(0.5)
Out[74]: (0.49019607843137247, 1.0, 0.47754585705249841, 1.0)
So, you could generate len(d) number of colors by passing the NumPy array np.linspace(0, 1, len(d)) to the colormap:
jet = plt.cm.jet
colors = jet(np.linspace(0, 1, len(d)))
The colors selected will then be equally spaced along the colormap gradient.
import matplotlib.pyplot as plt
import numpy as np
import time
d = {'5000cca229d10d09': {374851: 1}, '5000cca229cf3f8f': {372496:3},'5000cca229d106f9': {372496: 3, 372455: 2}, '5000cca229d0b3e4': {380904: 2, 380905: 1, 380906: 1, 386569: 1}, '5000cca229d098f8': {379296: 2, 379297: 2, 379299: 2, 379303: 1, 379306: 1, 379469: 1, 379471: 1, 379459: 1, 379476: 1, 379456: 4, 379609: 4}, '5000cca229d03957': {380160: 3, 380736: 3, 380162: 1, 380174: 1, 381072: 2, 379608: 2, 380568: 3, 380569: 1, 380570: 1, 379296: 3, 379300: 1, 380328: 3, 379306: 1, 380331: 1, 379824: 2, 379825: 1, 379827: 1, 380344: 1, 379836: 1, 379456: 3, 380737: 1, 380739: 1, 379462: 1, 379476: 1, 379992: 3, 379609: 1, 379994: 1, 379611: 1, 379621: 1, 380006: 1, 380904: 3, 380905: 1, 380907: 1, 380535: 3, 380536: 1, 380538: 1}, '5000cca229cf6d0b': {372768: 10, 372550: 15, 372616: 14, 372617: 20, 372653: 3, 372505: 2}, '5000cca229cec4f1': {372510: 132}}
jet = plt.cm.jet
colors = jet(np.linspace(0, 1, len(d)))
fig, ax = plt.subplots()
for color, data_dict in zip(colors, d.values()):
x = data_dict.keys()
y = data_dict.values()
ax.scatter(x,y,color=color, s = 90, marker='^')
plt.ylabel("Errors" , fontsize=18, color="Green")
plt.xlabel("Occured on",fontsize=18, color="Green")
plt.title("DDN23b", fontsize=25, color="Blue")
ax.get_xaxis().get_major_formatter().set_useOffset(False)
plt.xticks(rotation='vertical')
plt.xticks(plt.xticks()[0],
[time.strftime("%m/%d/%y, %H:%M:%S", time.localtime(item))
for item in plt.xticks()[0]*3600])
plt.legend(d.keys())
plt.subplots_adjust(bottom=.24,right=.98,left=0.03,top=.89)
plt.grid()
plt.show()

My python program is writing source code to a file instead of proper output

I wanted to make a text file concatentation/duplicate line removal program for some practice, and I'm just about ready to turn my computer off and wait for the singularity because I have no worldly idea as to why this code is behaving the way it is.
The kicker is I had pretty much made it. I had the script done, it was working fine. It merged files, the duplicate detection worked... Then I added comments, and it all went crazy. It used to go through a list of text files, open each of them, and read their lines into the merged file. For testing I have two files, wl1.txt and wl2.txt. These are their contents:
wl1.txt:
test
test1
word
word1
wordword
entry
python
random
wl2.txt:
yellow
red
bluw
test
test1
random
black
Black
This is the snippet of code that I am running to isolate this incident:
fileList = ['C:\lists\wl\wl1.txt','C:\lists\wl\wl2.txt']
outfile = open("C:\lists\wl\wlmerge.txt",'w+')
for fname in fileList:
infile = open(fname,"r")
for line in infile:
print line
outfile.write(line)
print outfile.read()
Before running the code wlmerge.txt did not exist and there are no other lines in the input text files than the ones I've listed. And this, is the output I get:
test
test1
word
word1
wordword
entry
python
random
mat__': 1, 'runit': 1, 'remove_selection': 1, '__str__': 1, '_file_line_helper': 1, '_asktabwidth': 1, '_filename_to_unicode': 1, 'open_stack_viewer': 1, 'get_region': 1, 'cut': 1, 'open_module': 1, 'showerror': 1, '__class__': 1, 'smart_indent_event': 1, 'set_status_bar': 1, 'about_dialog': 1, 'indent_region_event': 1, 'load_extension': 1, 'set_region': 1, '_close': 1, 'cancel_callback': 1, 'postwindowsmenu': 1, '__subclasshook__': 1, 'newline_and_indent_event': 1, 'toggle_debugger': 1, 'saved_change_hook': 1, 'eof_callback': 1, 'get_warning_stream': 1, 'get_standard_extension_names': 1, 'stop_readline': 1, 'guess_indent': 1, 'ResetFont': 1, 'rmenu_check_paste': 1, 'replace_event': 1, 'unload_extensions': 1, 'del_word_right': 1, 'close_debugger': 1, '_EditorWindow__extra_help_callback': 1, 'python_docs': 1, 'fill_menus': 1, 'flush': 1, 'close': 1, 'center_insert_event': 1, '__setattr__': 1, 'set_notabs_indentwidth': 1, 'help_dialog': 1, 'set_saved': 1, 'get_selection_indices': 1, 'open_debugger': 1, 'tabify_region_event': 1, 'comment_region_event': 1, 'get_var_obj': 1, 'find_selection_event': 1, '_rmcolorizer': 1, 'goto_line_event': 1, 'load_standard_extensions': 1, 'reset_undo': 1, 'long_title': 1, 'paste': 1, 'close2': 1, 'reset_help_menu_entries': 1, 'set_indentation_params': 1, 'open_class_browser': 1, 'endexecuting': 1, 'rmenu_check_cut': 1, '__delattr__': 1, '_addcolorizer': 1, '__repr__': 1, 'close_hook': 1, 'home_callback': 1, 'right_menu_event': 1, 'getlineno': 1, 'apply_bindings': 1, 'restart_shell': 1, '_make_blanks': 1, 'get_geometry': 1, 'ApplyKeybindings': 1, 'get_tabwidth': 1, 'ResetColorizer': 1, 'open_path_browser': 1, 'filename_change_hook': 1, '_build_char_in_string_func': 1, 'isatty': 1, 'find_event': 1, 'set_close_hook': 1, '__reduce__': 1, 'find_in_files_event': 1, 'untabify_region_event': 1, 'new_callback': 1, 'getvar': 1, 'copy': 1, 'rmenu_check_copy': 1, 'center': 1, 'writelines': 1, 'recall': 1, 'load_extensions': 1, 'showprompt': 1, 'close_event': 1, 'reindent_to': 1, 'askinteger': 1, '__hash__': 1, 'RemoveKeybindings': 1, 'dedent_region_event': 1, 'linefeed_callback': 1, 'is_char_in_string': 1, '__getattribute__': 1, 'move_at_edge_if_selection': 1, 'beginexecuting': 1, 'enter_callback': 1, 'short_title': 1, 'getwindowlines': 1, 'smart_backspace_event': 1, '__sizeof__': 1, 'set_tabwidth': 1, 'find_again_event': 1, '__init__': 1, 'del_word_left': 1, 'get_saved': 1, '__reduce_ex__': 1, '__new__': 1, 'select_all': 1, 'gotoline': 1, 'view_restart_mark': 1, 'change_indentwidth_event': 1, 'write': 1, 'set_debugger_indicator': 1, 'config_dialog': 1, 'set_warning_stream': 1, 'setvar': 1, 'createmenubar': 1, 'begin': 1, 'toggle_tabs_event': 1, 'askyesno': 1, 'ispythonsource': 1, 'resetoutput': 1, 'goto_file_line': 1, 'readline': 1, 'toggle_jit_stack_viewer': 1, 'make_rmenu': 1, '_EditorWindow__recent_file_callback': 1, 'uncomment_region_event': 1, 'update_recent_files_list': 1, 'set_line_and_column': 1})
find_again_event'
p118
I1
sS'__init__'
p119
I1
sS'del_word_left'
p120
I1
sS'askyesno'
p121
I1
sS'__reduce_ex__'
p122
I1
sS'__new__'
p123
I1
sS'getlineno'
p124
I1
sS'__format__'
p125
I1
sS'view_restart_mark'
p126
I1
sS'change_indentwidth_event'
p127
I1
sS'write'
p128
I1
sS'set_debugger_indicator'
p129
I1
sS'config_dialog'
p130
I1
sS'set_warning_stream'
p131
I1
sS'setvar'
p132
I1
sS'createmenubar'
p133
I1
sS'begin'
p134
I1
sS'toggle_tabs_event'
p135
I1
sS'help_dialog'
p136
I1
sS'ispythonsource'
p137
I1
sS'resetoutput'
p138
I1
sS'goto_file_line'
p139
I1
sS'readline'
p140
I1
sS'toggle_jit_stack_viewer'
p141
I1
sS'make_rmenu'
p142
I1
sS'center'
p143
I1
sS'uncomment_region_event'
p144
I1
sS'short_title'
p145
I1
sS'set_line_and_column'
p146
I1
stp147
tp148
.
Any help would be greatly appreciated.
You're opening the outfile with a mode of w+, which will truncate the output. You should get rid of the print outfile.read() line as the outfile is already at the end of the buffer, so read() is not doing what you think it should do.
If you really want to read the outfile after you have written to it (but still in your loop over files), you'll need to rewind to the start of the file. You can do this with outfile.seek(0).
If you want to open outfile as both read and write without truncation, you should use r+ as the mode.
Just delete the print outfile.read() line:
fileList = ['wl1.txt','wl2.txt']
outfile = open("wlmerge.txt",'w+')
for fname in fileList:
infile = open(fname,"r")
for line in infile:
print line
outfile.write(line)
This works fine for me.
Edit
To read it after writing:
outfile.close()
with open('wlmerge.txt', 'r') as f:
read_data = f.read()
print read_data

Categories

Resources