How to select PID values with regular expressions? - python

#!/usr/bin/env python3.7
import subprocess
import re
import os
def main():
output=subprocess.check_output(["ps","aux"])
output=output.decode()
print(output)
if __name__=="__main__":
main()
I am trying to extract all PID values and put them in a sepearate list but i am unable to extract these.

to extract all PID values and put them in a sepearate list
To extract only pid numbers change ps command to use a specific user format
(-o format - specify user-defined format) to limit output fields.
import subprocess
import os
def main():
output = subprocess.check_output(["ps", "ax", "-o", "pid", "--no-headers"])
pids = output.decode().split()
print(pids)
if __name__=="__main__":
main()
Sample output:
['1', '2', '3', '4', '6', '8', '9', '10', '11', '12', '13', '14', '16', '17',
'18', '19', '20', '21', '23', '24', '25', '26', '27', '28', '30', '31', '32',
'33', '34', '35', '37', '38', '39', '40', '41', '42', '44', '45', '46', '47',
'48', '49', '51', '52', '53', '54', '55', '56', '58', '59', '60', '61', '62',
'63', '65', '66', '67', '68', '69', '70', '72', '73', '74', '75', '76', '77',
'79', '80', '81', '82', '83', '84', '86', '87', '88', '89', '90', '91', '93',
'94', '95', '96', '97', '100', '101', '102', '103', '104', '105', '193', '194',
'195', '199', '200', '202', '205', '206', '209', '210', '211', '212', '213',
'214', '220', '231', '248', '287', '288', '289', '290', '291', '296', '297',
'300', '307', '314', '315', '321', '324', '326', '328', '341', '344', '347',
'348', '357', '361', '362', '363', '366', '432', '483', '488', '494', '516',
'517', '518', '519', '520', '521', '522', '523', '524', '525', '526', '527',
'528', '529', '604', '620', '621', '624', '625', '627', '636', '637', '650',
'651', '743', '744', '752', '753', '770', '771', '785', '786', '791', '792',
'793', '794', '795', '796', '797', '798', '829', '838', '848', '853', '854',
'855', '856', '857', '858', '859', '860', '865', '896', '900', '901', '911',
'912', '921', '936', '937', '940', '944', '960', '964', '968', '970', '975',
'984', '989', '991', '995', '999', '1001', '1016', '1025', '1030', '1033',
'1034', '1036', '1038', '1050', '1059', '1067', '1071', '1078', '1095', '1098',
'1104', '1110', '1112', '1117', '1122', '1131', '1132', '1152', '1157', '1163',
'1169', '1175', '1181', '1191', '1201', '1204', '1210', '1218', '1225', '1250',
'1258', '1261', '1288', '1289', '1290', '1291', '1292', '1293', '1294', '1295',
'1296', '1297', '1298', '1300', '1327', '1334', '1339', '1346', '1395', '1436',
'1444', '1469', '1682', '1687', '1689', '1701', '1715', '1727', '1751', '1771',
'1797', '1837', '1900', '1902', '1992', '2025', '2075', '2307', '2492', '2801',
'2842', '2911', '3404', '3870', '3871', '3874', '4086', '4195', '5217', '5249',
'5745', '5762', '5773', '5803', '5808', '5809', '5812', '5813', '5816', '5836',
'5841', '6008', '6073', '6087', '6104', '6605', '7934', '8127', '8663',
'10274', '10862', '12317', '12428', '12605', '12622', '12650', '12676',
'12677', '12756', '12904', '13242', '13609', '14722', '14812', '15367',
'15409', '15522', '15536', '15839', '15859', '16087', '16152', '16303',
'16386', '16387']

Related

sort function in python doesn't work for long list? [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 9 months ago.
Improve this question
a=['0', '0.05', '0.1', '0.15', '0.2', '0.25', '0.3', '0.35', '0.4', '0.45', '0.5', '0.55', '0.6', '0.65', '0.7', '0.75', '0.8', '0.85', '0.9', '0.95', '1', '1.05', '1.1', '1.15', '1.2', '1.25', '1.3', '1.35', '1.4', '1.45', '1.5', '1.55', '1.6', '1.65', '1.7', '1.75', '1.8', '1.85', '1.9', '1.95', '10', '10.05', '10.1', '10.15', '10.2', '10.25', '10.3', '10.35', '10.4', '10.45', '10.5', '10.55', '10.6', '10.65', '10.7', '10.75', '10.8', '10.85', '10.9', '10.95', '11', '11.05', '11.1', '11.15', '11.2', '11.25', '11.3', '11.35', '11.4', '11.45', '11.5', '11.55', '11.6', '11.65', '11.7', '11.75', '11.8', '11.85', '11.9', '11.95', '12', '12.05', '12.1', '12.15', '12.2', '12.25', '12.3', '12.35', '12.4', '12.45', '12.5', '12.55', '12.6', '12.65', '12.7', '12.75', '12.8', '12.85', '12.9', '12.95', '13', '13.05', '13.1', '13.15', '13.2', '13.25', '13.3', '13.35', '13.4', '13.45', '13.5', '13.55', '13.6', '13.65', '13.7', '13.75', '13.8', '13.85', '13.9', '13.95', '14', '14.05', '14.1', '14.15', '14.2', '14.25', '14.3', '14.35', '14.4', '14.45', '14.5', '14.55', '14.6', '14.65', '14.7', '14.75', '14.8', '14.85', '14.9', '14.95', '15', '15.05', '15.1', '15.15', '15.2', '15.25', '15.3', '15.35', '15.4', '15.45', '15.5', '15.55', '15.6', '15.65', '15.7', '15.75', '15.8', '15.85', '15.9', '15.95', '16', '16.05', '16.1', '16.15', '16.2', '16.25', '16.3', '16.35', '16.4', '16.45', '16.5', '16.55', '16.6', '16.65', '16.7', '16.75', '16.8', '16.85', '16.9', '16.95', '17', '17.05', '17.1', '17.15', '17.2', '17.25', '17.3', '17.35', '17.4', '17.45', '17.5', '17.55', '17.6', '17.65', '17.7', '17.75', '17.8', '17.85', '17.9', '17.95', '18', '18.05', '18.1', '18.15', '18.2', '18.25', '18.3', '18.35', '18.4', '18.45', '18.5', '18.55', '18.6', '18.65', '18.7', '18.75', '18.8', '18.85', '18.9', '18.95', '19', '19.05', '19.1', '19.15', '19.2', '19.25', '19.3', '19.35', '19.4', '19.45', '19.5', '19.55', '19.6', '19.65', '19.7', '19.75', '19.8', '19.85', '19.9', '19.95', '2', '2.05', '2.1', '2.15', '2.2', '2.25', '2.3', '2.35', '2.4', '2.45', '2.5', '2.55', '2.6', '2.65', '2.7', '2.75', '2.8', '2.85', '2.9', '2.95', '20', '20.05', '20.1', '20.15', '20.2', '20.25', '20.3', '20.35', '20.4', '20.45', '20.5', '20.55', '20.6', '20.65', '20.7', '20.75', '20.8', '20.85', '20.9', '20.95', '21', '21.05', '21.1', '21.15', '21.2', '21.25', '21.3', '21.35', '21.4', '21.45', '21.5', '21.55', '21.6', '21.65', '21.7', '21.75', '21.8', '21.85', '21.9', '21.95', '22', '22.05', '22.1', '22.15', '22.2', '22.25', '22.3', '22.35', '22.4', '22.45', '22.5', '22.55', '22.6', '22.65', '22.7', '22.75', '22.8', '22.85', '22.9', '22.95', '23', '23.05', '23.1', '23.15', '23.2', '23.25', '23.3', '23.35', '23.4', '23.45', '23.5', '23.55', '23.6', '23.65', '23.7', '23.75', '23.8', '23.85', '23.9', '23.95', '24', '24.05', '24.1', '24.15', '24.2', '24.25', '24.3', '24.35', '24.4', '24.45', '24.5', '24.55', '24.6', '24.65', '24.7', '24.75', '24.8', '24.85', '24.9', '24.95', '25', '25.05', '25.1', '25.15', '25.2', '25.25', '25.3', '25.35', '25.4', '25.45', '25.5']
a.sort()
print (a)
You need to convert your list in a list of float, not of string
a = [*map(float, a)] # [*map(float, a)] is equivalent to list(map(float, a))
a.sort()
print(a)
Should work.
If you want to convert it back to str you can do:
a = [*map(str,a)]
OR, if you don't want trailing zeros:
a = [*map(lambda c: str(round(c,2)).rstrip('0').rstrip('.'), a)]
As pointed by ShadowRanger, if you want it to keep it in str without converting it in Float at any time you can do
a.sort(key = float)
Or
a = a.sorted(key = float)

Python Loop: string Index Out of Range

Given the following csv file:
['offre_bfr.entreprise', 'offre_bfr.nombreemp', 'offre_bfr.ca2020', 'offre_bfr.ca2019', 'offre_bfr.ca2018', 'offre_bfr.benefice2020', 'offre_bfr.benefice2019', 'offre_bfr.benefice2018', 'offre_bfr.tauxrenta2020', 'offre_bfr.tauxrenta2019', 'offre_bfr.tauxrenta2018', 'offre_bfr.tauximposition', 'offre_bfr.chargesalariale', 'offre_bfr.chargesfixes', 'offre_bfr.agedirigeant', 'offre_bfr.partdirigeant', 'offre_bfr.agemoyact', 'offre_bfr.parttotaleact', 'offre_bfr.mtdmdcred', 'offre_bfr.creditusuel', 'offre_bfr.capipropres', 'offre_bfr.dettefin', 'offre_bfr.dettenonfin', 'offre_bfr.stock', 'offre_bfr.creances', 'offre_bfr.actifimmobilise', 'offre_bfr.passiftotal', 'offre_bfr.tresorerie', 'offre_bfr.capitalisation2020', 'offre_bfr.capitalisation2019', 'offre_bfr.capitalisation2018', 'offre_bfr.nivrisque', 'offre_bfr.indconfiance', 'offre_bfr.indperseverance', 'offre_bfr.score']
['1', '15', '1.84', '5.18', '7.96', '0.48', '1.19', '0.11', '26.086956', '22.972973', '1.3819095', '17.9', '0.035295', '1.2', '55', '33', '69', '67', '10', '14.98', '0.05', '0.04', '0.21', '0.1', '0.08', '0.41', '0.8', '0.0', '7.5', '52.8', '0.16', 'Bas', '4', '4', '5.0']
['3', '3030', '546.7', '589.7', '430.9', '62.58', '20.63', '99.06', '11.446863', '3.498389', '22.989092', '17.4', '7.12959', '270.9', '46', '37', '69', '73', '2973', '1567.3', '46.97', '13.39', '61.92', '3.0', '8.0', '145.0', '278.4', '-51.0', '1063.5', '3047.8', '538.08', 'Eleve', '4', '4', '3.0']
['4', '42', '4.28', '9.13', '8.99', '0.45', '0.59', '0.08', '10.514019', '6.4622126', '0.8898776', '31.5', '0.098826', '2.2', '70', '32', '53', '68', '9', '22.4', '0.13', '0.06', '0.31', '0.1', '0.07', '0.92', '1.7', '-0.3', '42.5', '69.5', '2.73', 'Eleve', '4', '4', '3.0']
['5', '497', '92.2', '62.5', '40.3', '20.14', '6.91', '4.92', '21.843819', '11.056', '12.208437', '32.2', '1.169441', '5.1', '64', '32', '70', '68', '197', '195.0', '6.07', '1.83', '12.49', '5.9', '3.83', '16.41', '16.5', '-2.7', '1048.3', '618.8', '11.24', 'Moyen', '4', '4', '4.0']
['8', '122', '67.8', '24.5', '91.4', '12.67', '5.69', '8.43', '18.687315', '23.22449', '9.223195', '24.8', '0.287066', '19.5', '53', '35', '61', '65', '424', '183.7', '1.64', '1.92', '6.48', '4.9', '2.45', '23.6', '23.7', '-3.5', '204.2', '109.5', '5.33', 'Eleve', '4', '4', '3.0']
['11', '310', '77.5', '78.7', '24.9', '8.05', '21.76', '1.79', '10.387096', '27.649302', '7.188755', '29.0', '0.72943', '12.0', '47', '32', '65', '68', '38', '181.1', '6.55', '3.27', '8.16', '5.1', '2.08', '15.09', '36.3', '-7.0', '669.8', '705.3', '22.95', 'Eleve', '4', '4', '3.0']
['14', '283', '91.9', '52.9', '51.9', '10.48', '7.01', '12.57', '11.4037', '13.251418', '24.219654', '24.2', '0.665899', '2.3', '61', '29', '58', '71', '60', '196.7', '8.02', '2.93', '7.79', '7.0', '3.87', '25.1', '42.7', '-4.4', '434.0', '143.4', '17.18', 'Eleve', '4', '4', '3.0']
['16', '41', '5.54', '6.48', '5.5', '1.55', '1.51', '0.73', '27.97834', '23.30247', '13.272727', '15.9', '0.096473', '2.4', '71', '39', '56', '61', '29', '17.52', '0.41', '0.11', '0.62', '0.3', '0.17', '1.47', '2.4', '0.0', '36.7', '76.0', '4.2', 'Bas', '4', '4', '5.0']
I would like to create a bar chart from columns 0 and 34 of the csv file.
Here is the python script I am running:
# -*-coding:Latin-1 -*
#!/usr/bin/python
#!/usr/bin/env python
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import csv
x = []
y = []
Bfr = csv.reader(open('/home/cloudera/PMGE/Bfr.csv​'))
linesBfr = list(Bfr)
i=1
for l in linesBfr:
x.append(l[i][0])
y.append(int(l[i][34]))
plt.bar(x, y, color = 'g', width = 0.72, label = "Score")
plt.xlabel('Entreprise')
plt.ylabel('Scores')
plt.title('Scores des entreprises en BFR')
plt.legend()
plt.show()
But i'm getting the following error:
Traceback (most recent call last):
File "barplot.py", line 20, in <module>
y.append(int(l[i][34]))
IndexError: string index out of range
Can someone help me out?
Python lists are zero-indexed. You are trying to iterate to the 35th element in a 34 element list.
Firstly, there are 35 elements from 0 to 34. This means that starting your indexing i at i=1 will look for an element at the 35th index, which does not exist, or be an "index out of range". To be more specific, your code is looking for a list that does not exist. Secondly, this is not the standard way to use 2d lists in python. I suggest using a method more as such:
https://www.kite.com/python/answers/how-to-append-to-a-2d-list-in-python#:~:text=Append%20a%20list%20to%20a,list%20to%20the%202D%20list.
Hope this was helpful.
You probably meant to write this:
x = []
y = []
Bfr = csv.reader(open('/home/cloudera/PMGE/Bfr.csv​'))
next(Bfr , None) # skip the header
for l in Bfr:
x.append(int(l[0]))
y.append(int(l[34]))
...
(See this question about skipping the header of a csv)

Combining two dictionaries into one if the value from the one dictionary exist in another

I am trying to construct a dictionary called author_venues in which author names are the keys and values are the list of venues where they have published.
I was given two dictionaries:
A sample author_pubs dictionary where author name is the key and a list of publication ids is the value
defaultdict(list,
{'José A. Blakeley': ['2',
'25',
'2018',
'2185',
'94602',
'145114',
'182779',
'182780',
'299422',
'299426',
'299428',
'299558',
'302125',
'511816',
'521294',
'597967',
'598123',
'598125',
'598130',
'598132',
'598134',
'598136',
'598620',
'600180',
'600221',
'642049',
'643606',
'808458',
'832249',
'938531',
'939047',
'1064640',
'1064641',
'1065929',
'1118153',
'1269074',
'2984279',
'3154713',
'3169639',
'3286099',
'3494140'],
'Yuri Breitbart': ['3',
'4',
'76914',
'113875',
'140847',
'147900',
'147901',
'150951',
'176221',
'176896',
'182963',
'200336',
'262940',
'285098',
'285564',
'299526',
'301313',
'303418',
'304160',
'400040',
'400041',
'400174',
'400175',
'402178',
'482506',
'482785',
'544757',
'545233',
'545429',
'559737',
'559761',
'559765',
'559783',
'559785',
'597889',
'598201',
'598202',
'598203',
'599325',
'599899',
'620806',
'636455',
'641884',
'642157',
'654200',
'654201',
'740600',
'740602',
'833336',
'844280',
'856032',
'856222',
'888870',
'934979',
'938228',
'941484',
'945339',
'949548',
'971592',
'971593',
'972813',
'972958',
'1064100',
'1064690',
'1064691',
'1064693',
'1064694',
'1078369',
'1078370',
'1089675',
'1095084',
'1121956',
'1122006',
'1122610',
'1127610',
'1138059',
'1138061',
'1141938',
'1227365',
'1278703',
'1319498',
'2818906',
'2876867',
'2978458',
'3015058',
'3223418'],
A sample venue_pubs dictionary where venue name is the key and a list of publication ids is the value
defaultdict(list,
{'Modern Database Systems': ['2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'10',
'11',
'12',
'13',
'14',
'15',
'16',
'17',
'18',
'19',
'20',
'21',
'22',
'23',
'24',
'25',
'26',
'27',
'28',
'29',
'30',
'31',
'32',
'33',
'34',
'1203459',
'3000615',
'3000616',
'3000617',
'3000618',
'3000619',
'3000620',
'3000621',
'3000622',
'3000623',
'3000624',
'3000625',
'3000626'],
'Object-Oriented Concepts, Databases, and Applications': ['36',
'37',
'38',
'39',
'40',
'41',
'42',
'43',
'44',
'45',
'46',
'47',
'48',
'49',
'50',
'51',
'52',
'53',
'54',
'55',
'56',
'57',
'58',
'59'],
'The INGRES Papers': ['60',
'61',
'62',
'63',
'64',
'65',
'66',
'67',
'68',
'69'],
'Temporal Databases': ['168',
'169',
'170',
'171',
'172',
'173',
'174',
'175',
'176',
'177',
'178',
'179',
'180',
'181',
'182',
'183',
'184',
'185',
'186',
'187',
'188',
'189',
'190',
'627582',
'627584',
'627588',
'627589',
'627591',
'627592',
'627593',
'627594',
'627596',
'627600',
'627601',
'627602',
'627603',
'627604',
'627605',
'627608',
'627613',
'627615',
'627616',
'627617'],
The resulting dictionary should look like {'author':['venue1','venue2','venue3']}
author_venue = defaultdict(list)
This is code I wrote:
for k,v in author_pubs.items():
for item in v:
for x,y in venue_pubs.items():
if item in y:
venue = x
author_venue[k].append(venue)
But this loop takes forever since I have over 3million records
please help!
You can "invert" the dictionary venue_pubs to speed up the search:
from collections import defaultdict
author_pubs = {
"author1": [1, 2, 3],
"author2": [3, 4, 5],
}
venue_pubs = {
"xxx1": [1, 4, 20],
"xxx2": [4, 30, 40],
}
# "invert" dictionary `venue_pubs`:
tmp = defaultdict(list)
for k, v in venue_pubs.items():
for val in v:
tmp[val].append(k)
author_venue = defaultdict(list)
for k, v in author_pubs.items():
for item in v:
venues = tmp.get(item)
if not venues is None:
author_venue[k].extend(venues)
print(author_venue)
Prints:
defaultdict(<class 'list'>, {'author1': ['xxx1'], 'author2': ['xxx1', 'xxx2']})
EDIT: To remove duplicates:
# ...
for k in author_venue:
author_venue[k] = list(set(author_venue[k]))
print(author_venue)

python loop through list with multiple list

I have a list - scraped & cleaned from a html data table
['8', '1', 'X', '308', '134', '157', '46', '237', '107', '58', '843', '137', '26', '549', '---', '---', '---', '---']
['79', '2', '341', 'X', '401', '1148', '687', '1619', '1604', '674', '2504', '1666', '257', '3154', '---', '---', '---', '---']
['18', '3', '132', '356', 'X', '241', '153', '536', '258', '174', '1293', '348', '67', '1056', '---', '---', '---', '---']
['12', '4', '163', '891', '241', 'X', '112', '508', '227', '154', '1481', '321', '54', '747', '---', '---', '---', '---']
['9/2', '5', '39', '370', '120', '90', 'X', '116', '75', '31', '485', '79', '15', '285', '---', '---', '---', '---']
Each [ ] represents a row of data that I want to save into a db table. Now, how do I loop through each [ ] and treat it as it is a new row? If I do a for loop it doesn’t seem to pick up that each new [ ] is a new data row. I also imagine I have to split each row so that I can properly save the data into the specific db column
based on your code from comment section
data_list=[]
rows = table.tbody.findAll("tr")
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data_list.append(cols)
now use for loop to get each list inside a list like this
for li in data_list:
for data in li:
print data

Index error for list in python

I've written the following code to iterate through folders and their files and to rename each file in each folder as the file's index in the folder. E.g The first file in each folder will be named 1.JPG, the second 2.JPG and so on. The folder names are integers from 1 to 82. I need the folder name to specify the path in os.rename() and was planning to obtain it from the dirs list because os.walk(path) does not traverse the directories in order.
Code:
import os
import sys
path='/home/srilatha/Desktop/Research_intern/Data_sets/Final'
i=0
for root, dirs, files in os.walk(path):
print(dirs)
print(dirs[i])
#folder_name=dirs[0]
#print(folder_name)
j=0
for name in sorted(files):
j+=1
#print('j=')
#print(j)
print(name)
new=str(j)
new_name=new+'.JPG'
print(new_name)
#os.rename(name,new_name)
i+=1
Error Message:
/usr/bin/python3.4 /home/srilatha/PycharmProjects/Research_Intern/Sort_images_into_folders.py
['9', '43', '78', '7', '51', '15', '4', '68', '48', '67', '27', '16', '55', '20', '57', '38', '47', '18', '77', '82', '12', '65', '25', '59', '49', '30', '36', '79', '71', '17', '22', '42', '40', '73', '19', '24', '10', '37', '32', '3', '64', '62', '58', '13', '72', '2', '14', '70', '11', '66', '69', '50', '54', '34', '5', '52', '81', '26', '39', '60', '1', '56', '33', '80', '23', '53', '44', '45', '29', '41', '28', '35', '6', '46', '31', '8', '63', '75', '61', '76', '74', '21']
9
[]
Traceback (most recent call last):
File "/home/srilatha/PycharmProjects/Research_Intern/Sort_images_into_folders.py", line 9, in <module>
print(dirs[i])
IndexError: list index out of range
I assume you want something like this?
# Import the os module, for the os.walk function
import os
# Set the directory you want to start from
rootDir = '/Users/heinst'
for dirName, subdirList, fileList in os.walk(rootDir):
print('Found directory: %s' % dirName)
i = 0
for fname in fileList:
print '\t{0} -> {1}'.format(fname, str(i) + os.path.splitext(fname)[1])
i += 1

Categories

Resources