I have a nested list e.g., ls=[((0,3),(1,0),(2,2),(3,0)),((0,0),(1,3),(2,2),(3,0))].
And I would like to have the following modified list:
modified_ls=[((0,0),(0,0),(0,0),(1,0),(2,0),(2,0),(3,0)),((0,0),(1,0),(1,0),(1,0),(2,0),(2,0),(3,0))]
There the element (x,0) in modified_ls is repeated by N (N!=0) times which is based on the element (x, N) in ls.
Here is what I do (quite stupid):
ls=[((0,3),(1,0),(2,2),(3,0)),((0,0),(1,3),(2,2),(3,0))]
modified_temp_ls=[]
for ii in ls:
for jj in ii:
temp=jj[1]
if temp==0:
modified_temp_ls.append(jj)
else:
while temp:
modified_temp_ls.append((jj[0],0))
temp-=1
ls2=modified_temp_ls[0:int(len(modified_temp_ls)/2)]
ls3=modified_temp_ls[int(len(modified_temp_ls)/2):int(len(modified_temp_ls))]
modified_ls=[]
modified_ls.append(tuple(ls2))
modified_ls.append(tuple(ls3))
Are there any simple way to do it (e.g, without using ls2 and ls3, etc.)? Thank you very much in advance for any suggestions!
This is just list manipulation. Generate the individual sequences, then use sum to combine them into a single list.
ls=[((0,3),(1,0),(2,2),(3,0)),((0,0),(1,3),(2,2),(3,0))]
accum = []
for k in ls:
accum.append(sum([ [(i,0)]*max(1,n) for i,n in k ],[]))
print(accum)
Output:
[[(0, 0), (0, 0), (0, 0), (1, 0), (2, 0), (2, 0), (3, 0)], [(0, 0), (1, 0), (1, 0), (1, 0), (2, 0), (2, 0), (3, 0)]]
Or for the obligatory one liner:
ls=[((0,3),(1,0),(2,2),(3,0)),((0,0),(1,3),(2,2),(3,0))]
accum = [sum([[(i,0)]*max(1,n) for i,n in k ],[]) for k in ls]
I would like to store the output of a for loop into a textfile but is not the desired output as they will only print the last element everytime.
Please do take a look at the 2nd for loop (I have tried to put the output of the for loop in a text file but they always seems to save the last element into the text file)...
print('\nTopic id, number of documents, list of documents with probability and represented topic words: ')
dic_topic_doc = {}
# for doc in doc_clean:
for index, doc in enumerate(doc_clean):
bow = dictionary.doc2bow(doc)
# get topic distribution of the ldamodel
t = ldamodel.get_document_topics(bow)
# sort the probability value in descending order to extract the top
# contributing topic id
sorted_t = sorted(t, key=lambda x: x[1], reverse=True)
# print only the filename
arr = []
r = filenames[index], sorted_t
arr += [r]
# print(filenames[index], sorted_t)
text_file = open("text_file1.txt", "w")
for item in arr:
text_file.write("%s\n" % str(item))
text_file.close()
# get the top scoring item
top_item = sorted_t.pop(0)
# create dictionary and keep key as topic id and filename
# and probability in tuple as value
dic_topic_doc.setdefault(top_item[0], []).append((filenames[index], top_item[1]))
The output of the arr is:
[('001.txt', [(4, 0.71602935), (3, 0.28154704)])]
[('002.txt', [(1, 0.7049297), (3, 0.29243866)])]
[('003.txt', [(1, 0.99487615)])]
[('004.txt', [(3, 0.99692315)])]
[('005.txt', [(3, 0.9950977)])]
[('006.txt', [(3, 0.9928176)])]
[('007.txt', [(3, 0.9953267)])]
[('008.txt', [(1, 0.7267725), (3, 0.27005684)])]
[('009.txt', [(4, 0.99464)])]
[('010.txt', [(1, 0.9942703)])]
[('011.txt', [(4, 0.66636235), (3, 0.32503912)])]
[('012.txt', [(3, 0.9957714)])]
[('013.txt', [(3, 0.9956188)])]
[('014.txt', [(3, 0.9970782)])]
[('015.txt', [(0, 0.9260712), (1, 0.07222312)])]
[('016.txt', [(3, 0.9941127)])]
[('017.txt', [(3, 0.9938843)])]
[('018.txt', [(1, 0.9922442)])]
[('019.txt', [(3, 0.99532056)])]
[('020.txt', [(1, 0.9978331)])]
[('021.txt', [(0, 0.79601324), (3, 0.20101906)])]
[('022.txt', [(3, 0.9956737)])]
[('023.txt', [(3, 0.77211946), (0, 0.22313568)])]
[('024.txt', [(1, 0.70796424), (3, 0.28902704)])]
[('025.txt', [(1, 0.99687904)])]
[('026.txt', [(1, 0.9956988)])]
[('027.txt', [(1, 0.8199397), (3, 0.1765291)])]
[('028.txt', [(4, 0.80725664), (3, 0.18847291)])]
[('029.txt', [(1, 0.9965614)])]
[('030.txt', [(3, 0.57492805), (1, 0.4226808)])]
[('031.txt', [(3, 0.99596083)])]
[('032.txt', [(3, 0.9952822)])]
[('033.txt', [(2, 0.70318085), (3, 0.2922277)])]
[('034.txt', [(1, 0.89127207), (3, 0.10545795)])]
[('035.txt', [(1, 0.722026), (3, 0.27340224)])]
[('036.txt', [(1, 0.9960558)])]
[('037.txt', [(2, 0.6491061), (3, 0.348281)])]
[('038.txt', [(3, 0.9922011)])]
[('039.txt', [(1, 0.9939154)])]
[('040.txt', [(3, 0.994332)])]
[('041.txt', [(2, 0.55159), (3, 0.44460982)])]
[('042.txt', [(2, 0.70692), (3, 0.2888305)])]
[('043.txt', [(2, 0.99319774)])]
[('044.txt', [(3, 0.9966152)])]
[('045.txt', [(4, 0.8118485), (3, 0.18499091)])]
[('046.txt', [(3, 0.99555445)])]
[('047.txt', [(0, 0.69498456), (3, 0.301216)])]
[('048.txt', [(4, 0.5628694), (3, 0.4332467)])]
[('049.txt', [(0, 0.9977888)])]
[('050.txt', [(4, 0.71272886), (3, 0.28423065)])]
[('051.txt', [(0, 0.9960364)])]
[('052.txt', [(1, 0.75808185), (4, 0.23720185)])]
[('053.txt', [(4, 0.9951011)])]
[('054.txt', [(1, 0.9947669)])]
[('055.txt', [(4, 0.99493676)])]
[('056.txt', [(2, 0.8089563), (4, 0.18770455)])]
[('057.txt', [(2, 0.99461764)])]
[('058.txt', [(0, 0.99397254)])]
[('059.txt', [(0, 0.99506336)])]
[('060.txt', [(4, 0.5311487), (1, 0.46360484)])]
[('061.txt', [(4, 0.9979783)])]
[('062.txt', [(4, 0.99500084)])]
[('063.txt', [(4, 0.5147298), (1, 0.48232165)])]
[('064.txt', [(0, 0.9976079)])]
[('065.txt', [(0, 0.75504255), (4, 0.23867798)])]
[('066.txt', [(0, 0.55720884), (4, 0.43956935)])]
[('067.txt', [(4, 0.99436694)])]
[('068.txt', [(4, 0.59490424), (1, 0.40078953)])]
[('069.txt', [(0, 0.9960368)])]
[('070.txt', [(2, 0.5861434), (4, 0.40975693)])]
[('071.txt', [(3, 0.59358937), (4, 0.40286723)])]
[('072.txt', [(4, 0.8263165), (1, 0.170669)])]
[('073.txt', [(4, 0.9940411)])]
[('074.txt', [(1, 0.7025927), (4, 0.29399806)])]
[('075.txt', [(0, 0.9965689)])]
[('076.txt', [(4, 0.9947142)])]
[('077.txt', [(4, 0.9954025)])]
[('078.txt', [(0, 0.96838295), (2, 0.028607361)])]
[('079.txt', [(4, 0.9937655)])]
[('080.txt', [(4, 0.99437577)])]
[('081.txt', [(0, 0.9953118)])]
[('082.txt', [(0, 0.9963087)])]
[('083.txt', [(1, 0.60066926), (4, 0.39384273)])]
[('084.txt', [(0, 0.99413854)])]
[('085.txt', [(1, 0.9960935)])]
[('086.txt', [(0, 0.99625784)])]
[('087.txt', [(0, 0.6497531), (4, 0.34491497)])]
[('088.txt', [(1, 0.70258677), (4, 0.2940039)])]
[('089.txt', [(0, 0.99131155)])]
[('090.txt', [(1, 0.91544366), (0, 0.08105935)])]
[('091.txt', [(4, 0.731707), (0, 0.26340982)])]
[('092.txt', [(0, 0.991419)])]
[('093.txt', [(0, 0.99421775)])]
[('094.txt', [(2, 0.99614394)])]
[('095.txt', [(0, 0.73367894), (4, 0.26461697)])]
[('096.txt', [(0, 0.99521035)])]
[('097.txt', [(4, 0.9960171)])]
[('098.txt', [(4, 0.9937229)])]
[('099.txt', [(4, 0.9939455)])]
[('100.txt', [(4, 0.99591196)])]
[('101.txt', [(3, 0.9976063)])]
[('102.txt', [(1, 0.99697727)])]
[('103.txt', [(2, 0.9963148)])]
[('104.txt', [(4, 0.8498221), (3, 0.14795008)])]
[('105.txt', [(0, 0.5934393), (1, 0.4045119)])]
[('106.txt', [(3, 0.56688505), (2, 0.4309479)])]
[('107.txt', [(3, 0.89017695), (2, 0.1069556)])]
[('108.txt', [(1, 0.9921692)])]
[('109.txt', [(3, 0.6526474), (0, 0.34404448)])]
[('110.txt', [(2, 0.6523364), (1, 0.23732765), (3, 0.10890786)])]
[('111.txt', [(2, 0.9984435)])]
[('112.txt', [(1, 0.99638546)])]
[('113.txt', [(2, 0.99526036)])]
[('114.txt', [(3, 0.9949771)])]
[('115.txt', [(4, 0.8333082), (1, 0.13397394), (3, 0.031519413)])]
[('116.txt', [(2, 0.93116885), (1, 0.06670692)])]
[('117.txt', [(1, 0.99436975)])]
[('118.txt', [(1, 0.9965262)])]
[('119.txt', [(3, 0.82268876), (1, 0.17507821)])]
[('120.txt', [(1, 0.9963602)])]
[('121.txt', [(2, 0.9975567)])]
[('122.txt', [(2, 0.9975837)])]
[('123.txt', [(0, 0.67087364), (1, 0.3223502)])]
[('124.txt', [(2, 0.99769515)])]
[('125.txt', [(1, 0.99586403)])]
[('126.txt', [(2, 0.80726296), (1, 0.19040845)])]
[('127.txt', [(3, 0.76902544), (2, 0.22932427)])]
[('128.txt', [(3, 0.9977924)])]
[('129.txt', [(3, 0.99410117)])]
[('130.txt', [(3, 0.99703735)])]
[('131.txt', [(1, 0.9959712)])]
[('132.txt', [(2, 0.6769866), (4, 0.31980133)])]
[('133.txt', [(1, 0.9948419)])]
[('134.txt', [(0, 0.5198297), (1, 0.33842823), (2, 0.14010021)])]
[('135.txt', [(0, 0.44838846), (3, 0.32033986), (1, 0.23010626)])]
[('136.txt', [(1, 0.99709207)])]
[('137.txt', [(0, 0.8783441), (2, 0.0889), (1, 0.03159054)])]
[('138.txt', [(1, 0.99661326)])]
[('139.txt', [(0, 0.6388813), (1, 0.35774702)])]
[('140.txt', [(2, 0.993027)])]
[('141.txt', [(1, 0.99740076)])]
[('142.txt', [(1, 0.99737215)])]
[('143.txt', [(1, 0.9967778)])]
[('144.txt', [(4, 0.6482304), (2, 0.3464205)])]
[('145.txt', [(3, 0.991145)])]
[('146.txt', [(2, 0.99620616)])]
[('147.txt', [(2, 0.99727434)])]
[('148.txt', [(1, 0.9970219)])]
[('149.txt', [(1, 0.99663305)])]
[('150.txt', [(0, 0.6730801), (2, 0.32252583)])]
[('151.txt', [(2, 0.71864104), (3, 0.27687418)])]
[('152.txt', [(2, 0.5830273), (0, 0.41458392)])]
[('153.txt', [(3, 0.8325644), (2, 0.16258276)])]
[('154.txt', [(2, 0.6469322), (1, 0.34925482)])]
[('155.txt', [(2, 0.99509275)])]
[('156.txt', [(2, 0.9953295)])]
[('157.txt', [(2, 0.99550176)])]
[('158.txt', [(2, 0.9961249)])]
[('159.txt', [(2, 0.610686), (0, 0.38501245)])]
[('160.txt', [(2, 0.99716145)])]
[('161.txt', [(3, 0.48505446), (2, 0.36028314), (0, 0.15067576)])]
[('162.txt', [(4, 0.49675527), (2, 0.49561986)])]
[('163.txt', [(4, 0.99753934)])]
[('164.txt', [(1, 0.6566721), (2, 0.33890736)])]
[('165.txt', [(2, 0.5288824), (3, 0.46510658)])]
[('166.txt', [(1, 0.64638895), (2, 0.350259)])]
[('167.txt', [(2, 0.6616886), (3, 0.33044046)])]
[('168.txt', [(2, 0.9941413)])]
[('169.txt', [(2, 0.58227646), (0, 0.4137176)])]
[('170.txt', [(2, 0.99611557)])]
[('171.txt', [(0, 0.9897187)])]
[('172.txt', [(2, 0.9950201)])]
[('173.txt', [(4, 0.9910078)])]
[('174.txt', [(0, 0.8987627), (2, 0.09535792)])]
[('175.txt', [(2, 0.399172), (4, 0.3129197), (1, 0.2825416)])]
[('176.txt', [(2, 0.99535024)])]
[('177.txt', [(2, 0.51980865), (0, 0.4761103)])]
[('178.txt', [(2, 0.9942798)])]
[('179.txt', [(0, 0.9978292)])]
[('180.txt', [(0, 0.99799275)])]
[('181.txt', [(0, 0.9971965)])]
[('182.txt', [(0, 0.99719584)])]
[('183.txt', [(2, 0.9947798)])]
[('184.txt', [(0, 0.9946292)])]
[('185.txt', [(2, 0.99375385)])]
[('186.txt', [(0, 0.97043765), (2, 0.026055241)])]
[('187.txt', [(2, 0.6645889), (4, 0.33031783)])]
[('188.txt', [(2, 0.5876885), (1, 0.40868264)])]
[('189.txt', [(1, 0.6894244), (2, 0.30670562)])]
[('190.txt', [(0, 0.9950181)])]
[('191.txt', [(1, 0.9896501)])]
[('192.txt', [(0, 0.99681157)])]
[('193.txt', [(1, 0.6064344), (2, 0.3890222)])]
[('194.txt', [(1, 0.6191368), (2, 0.37391865)])]
[('195.txt', [(2, 0.99624527)])]
[('196.txt', [(2, 0.82650113), (3, 0.16979064)])]
[('197.txt', [(0, 0.69186723), (2, 0.30481166)])]
[('198.txt', [(2, 0.99368984)])]
[('199.txt', [(2, 0.99674624)])]
[('200.txt', [(2, 0.533913), (0, 0.46221077)])]
[('201.txt', [(0, 0.9968048)])]
[('202.txt', [(2, 0.99731094)])]
[('203.txt', [(2, 0.68663925), (4, 0.3111823)])]
[('204.txt', [(0, 0.99631995)])]
[('205.txt', [(0, 0.99645954)])]
[('206.txt', [(2, 0.9978021)])]
[('207.txt', [(2, 0.9333098), (1, 0.063321896)])]
[('208.txt', [(0, 0.9977464)])]
[('209.txt', [(4, 0.5554451), (2, 0.43833327)])]
[('210.txt', [(1, 0.9974662)])]
[('211.txt', [(1, 0.9972499)])]
[('212.txt', [(0, 0.9977275)])]
[('213.txt', [(0, 0.99759126)])]
[('214.txt', [(2, 0.99620396)])]
[('215.txt', [(0, 0.9966316)])]
[('216.txt', [(0, 0.99804056)])]
[('217.txt', [(0, 0.65975994), (1, 0.33773333)])]
[('218.txt', [(4, 0.99706537)])]
[('219.txt', [(0, 0.99597585)])]
[('220.txt', [(0, 0.9973574)])]
[('221.txt', [(2, 0.9961178)])]
[('222.txt', [(0, 0.7885542), (1, 0.20995435)])]
[('223.txt', [(4, 0.94501954), (0, 0.052957233)])]
[('224.txt', [(0, 0.99597615)])]
[('225.txt', [(0, 0.99772716)])]
[('226.txt', [(1, 0.9972503)])]
[('227.txt', [(1, 0.9962298)])]
[('228.txt', [(0, 0.99788153)])]
[('229.txt', [(0, 0.99829)])]
[('230.txt', [(0, 0.9957878)])]
[('231.txt', [(2, 0.9967612)])]
[('232.txt', [(0, 0.9935971)])]
[('233.txt', [(4, 0.9954175)])]
[('234.txt', [(0, 0.99781704)])]
[('235.txt', [(0, 0.90009326), (4, 0.09837005)])]
[('236.txt', [(0, 0.9963427)])]
[('237.txt', [(0, 0.99463385)])]
[('238.txt', [(0, 0.7446245), (1, 0.24992703)])]
[('239.txt', [(3, 0.58633035), (0, 0.2825646), (1, 0.1281509)])]
[('240.txt', [(0, 0.99782217)])]
[('241.txt', [(1, 0.8480159), (0, 0.14756873)])]
[('242.txt', [(3, 0.9947073)])]
[('243.txt', [(2, 0.90908307), (0, 0.08884188)])]
[('244.txt', [(4, 0.9128273), (0, 0.085246615)])]
[('245.txt', [(3, 0.68367517), (2, 0.22646488), (4, 0.08881564)])]
[('246.txt', [(0, 0.9964561)])]
[('247.txt', [(0, 0.53860736), (1, 0.4575816)])]
[('248.txt', [(1, 0.99430263)])]
[('249.txt', [(2, 0.983931), (3, 0.013103891)])]
[('250.txt', [(1, 0.9961023)])]
Context handlers will automatically close out its resources after it leaves scope. open() has a context handler and can be used like this:
with open('text_file1.txt', 'w') as f_out:
for item in arr:
f_out.write("%s\n" % str(item))
In your code, you call text_file.close() which will close the file handler after only writing the first entry. How you don't error out after the first write() statement is beyond me.
If you want to keep your current code without importing extra libraries:
print('\nTopic id, number of documents, list of documents with probability and represented topic words: ')
dic_topic_doc = {}
# for doc in doc_clean:
for index, doc in enumerate(doc_clean):
bow = dictionary.doc2bow(doc)
# get topic distribution of the ldamodel
t = ldamodel.get_document_topics(bow)
# sort the probability value in descending order to extract the top
# contributing topic id
sorted_t = sorted(t, key=lambda x: x[1], reverse=True)
# print only the filename
arr = []
r = filenames[index], sorted_t
arr += [r]
# print(filenames[index], sorted_t)
text_file = open("text_file1.txt", "a") # Just changing the open mode from w(rite) to a(ppend) does the trick
for item in arr:
text_file.write("%s\n" % str(item))
text_file.close()
# get the top scoring item
top_item = sorted_t.pop(0)
# create dictionary and keep key as topic id and filename
# and probability in tuple as value
dic_topic_doc.setdefault(top_item[0], []).append((filenames[index], top_item[1]))
I also suggest you to read the relevant part of the Python documentation.
from collections import defaultdict as ddict
dic_topic_doc = ddict(list)
text_file = open("text_file1.txt", "w")
with open('text_file1.txt', 'a') as f:
for item in arr:
f.write(f"{item}\n")
top_item = sorted_t.pop(0)
dic_topic_doc.append((filenames[index], top_item[1]))
Try this.
Just write the whole array in a single operation, so you do not override you file after writing each element. You can use the with open() method as #SilverSlash commented or the numpy.savetxt method which also works for lists of integers or floats.
import numpy
numpy.savetxt('arr.out', arr, delimiter=',')
Edit
On your second loop remove the lines that handle the writing part and add numpy.savetxt('arr.out', arr, delimiter=',') at the end of the script. Remember to import numpy add the beginning of the script.
for item in arr:
# get the top scoring item
top_item = sorted_t.pop(0)
# create dictionary and keep key as topic id and filename
# and probability in tuple as value
dic_topic_doc.setdefault(top_item[0], []).append((filenames[index], top_item[1])
numpy.savetxt('arr.out', arr, delimiter=',')
Edit 2
Looks like you list arr is a list of tuples containing nested lists. So numpy.savetxt will not work. Instead use
for item in arr:
# get the top scoring item
top_item = sorted_t.pop(0)
# create dictionary and keep key as topic id and filename
# and probability in tuple as value
dic_topic_doc.setdefault(top_item[0], []).append((filenames[index], top_item[1])
with open('arr.txt', 'w') as f:
for item in arr:
f.write("%s\n" % str(item))