I have text files which have sql queries.After running one file "tb_exec_ns_call_pln.txt" i'm getting two dates like- 2018-12-29 ,
2019-03-29.
i just want to pass these dates in other text file (tb_exec_ns_call_actvty.txt) using python. the text file contain the below query-
SELECT a.nm as cycle_nm,
a.start_dt as cycle_start_dt,
a.end_dt as cycle_end_dt,
a.terr as territory,sales_drctn,
x_rating1,
c.jnj_id as jnj_id,
c.prsn_first_nm,
c.prsn_last_nm,
plnnd_calls as rep_goal
FROM eureka.cycle_plan a, eureka.cycle_plan_trgt b, eureka.acct c
WHERE
a.id = b.cycle_plan
and b.acct = c.id
and b.del_flg = 'N'
***and start_dt >= '2018-12-29'***
***and end_dt <= '2019-03-29'***
and substring(a.terr,1,6) in ('106-KS','106-PI','106-VO')
and a.status = 'In_Progress_vod'
and a.del_flg = 'N'
and c.del_flg = 'N' and plnnd_calls > 0
i have written python script also.. Please guide me how to pass the value.
path = "D:/Users/SPate233/Downloads/NS dashboard/tb_exec_ns_call_pln.txt"
sql_query_file = open(path, 'r')
sql_query1 = sql_query_file.read()
cur.execute(sql_query1)
res = cur.fetchall()
print(res)
print(type(res))
for val in res:
print(val[1])
print(val[2])
One approach is to have a string variable hardcoded in to tb_exec_ns_call_actvty.txt and then use str.replace to fill in the required info.
Ex:
SELECT a.nm as cycle_nm,
a.start_dt as cycle_start_dt,
a.end_dt as cycle_end_dt,
a.terr as territory,sales_drctn,
x_rating1,
c.jnj_id as jnj_id,
c.prsn_first_nm,
c.prsn_last_nm,
plnnd_calls as rep_goal
FROM eureka.cycle_plan a, eureka.cycle_plan_trgt b, eureka.acct c
WHERE
a.id = b.cycle_plan
and b.acct = c.id
and b.del_flg = 'N'
and start_dt >= 'START_DT'
and end_dt <= 'END_DT'
and substring(a.terr,1,6) in ('106-KS','106-PI','106-VO')
and a.status = 'In_Progress_vod'
and a.del_flg = 'N'
and c.del_flg = 'N' and plnnd_calls > 0
InCode:
path = "D:/Users/SPate233/Downloads/NS dashboard/tb_exec_ns_call_pln.txt"
with open(path) as sql_query_file:
sql_query1 = sql_query_file.read()
sql_query1 = sql_query1.replace("START_DT", '2018-12-29').replace("END_DT", '2019-03-29')
cur.execute(sql_query1)
res = cur.fetchall()
Related
I created my first API where I can get data from my tables in Bigquery.
I can get all the data I need based on the 2 inputs below, but I am also trying to get the whole table when the inputs are empty, which I cannot do.
Thanks for your help
#app.route("/tracking", methods=['GET'])
def tracking_data():
haulier_id_tracking = request.args.get('haulier_id_tracking')
month_tracking = request.args.get('month_tracking')
query_job = bq_client.query("""
WITH t AS (
SELECT *
FROM mart.monthly_vehicle_stats
WHERE dt_fr_month = '{month_tracking}-01' AND (haulier_id_tracking = '{haulier_id_tracking}')
SELECT TO_JSON_STRING(STRUCT(ARRAY_AGG(STRUCT(dt_fr_month, haulier_id_tracking, vehicle_id , nb_days_tracked,
data_access, date_first_camp, invoiced)) AS data)) json
FROM t
""".format(month_tracking = month_tracking, haulier_id_tracking = haulier_id_tracking))
for row in query_job:
return json.loads(row["json"])
You can remove the where clause when the input is empty like this:
#app.route("/tracking", methods=['GET'])
def tracking_data():
haulier_id_tracking = request.args.get('haulier_id_tracking')
month_tracking = request.args.get('month_tracking')
where_clause = ''
if haulier_id_tracking != '' and month_tracking != '':
where_clause = f"WHERE dt_fr_month = '{month_tracking}-01' AND (haulier_id_tracking = '{haulier_id_tracking}'"
query_job = bq_client.query(f"""
WITH t AS (
SELECT * FROM mart.monthly_vehicle_stats {where_clause})
SELECT TO_JSON_STRING(STRUCT(ARRAY_AGG(STRUCT(dt_fr_month, haulier_id_tracking, vehicle_id , nb_days_tracked,
data_access, date_first_camp, invoiced)) AS data)) json
FROM t
""")
for row in query_job:
return json.loads(row["json"])
I'd like to do a and clause with two lists of multiple or clauses from the same table.
The problem with the following code is, that the query result is empty. If I just select 'indices' or 'brokers', the result is fine.
...
query = query.join(StockGroupTicker, on=(Ticker.id == StockGroupTicker.ticker))
# indices
if "indices" in filter:
where_indices = []
for f in filter["indices"]:
where_indices.append(StockGroupTicker.stock_index == int(f))
if len(where_indices):
query = query.where(peewee.reduce(peewee.operator.or_, where_indices))
# broker
if "brokers" in filter:
where_broker = []
for f in filter["brokers"]:
where_broker.append(StockGroupTicker.stock_index == int(f))
if len(where_broker):
query = query.where(peewee.reduce(peewee.operator.or_, where_broker))
return query.distinct()
SQL Querie (update)
# index and brocker
SELECT
DISTINCT `t1`.`id`,
`t1`.`symbol`,
`t1`.`type`,
`t1`.`name`,
`t1`.`sector`,
`t1`.`region`,
`t1`.`primary_exchange`,
`t1`.`currency`,
`t1`.`score`,
`t1`.`last_price`,
`t1`.`last_price_date`,
`t1`.`last_price_check`,
`t1`.`last_stock_split`,
`t1`.`next_earning`,
`t1`.`last_earnings_update`,
`t1`.`disused`,
`t1`.`source`,
`t1`.`source_intraday`,
`t1`.`created`,
`t1`.`modified`,
`t2`.`invest_score` AS `invest_score`
FROM
`ticker` AS `t1`
INNER JOIN `tickerstats` AS `t2` ON
(`t1`.`id` = `t2`.`ticker_id`)
INNER JOIN `stockgroupticker` AS `t3` ON
(`t1`.`id` = `t3`.`ticker_id`)
WHERE
(((((`t1`.`disused` IS NULL)
OR (`t1`.`disused` = 0))
AND (`t2`.`volume_mean_5` > 10000.0))
AND (`t3`.`stock_index_id` = 1))
AND (`t3`.`stock_index_id` = 10)
)
Thanks to #coleifer, the peewee solution is quite simple. I had to use an alias.
if "indices" in filter and filter["indices"]:
query = query.join(
StockGroupTicker, peewee.JOIN.INNER, on=(Ticker.id == StockGroupTicker.ticker)
)
where_indices = []
for f in filter["indices"]:
where_indices.append(StockGroupTicker.stock_index == int(f))
if len(where_indices):
query = query.where(peewee.reduce(peewee.operator.or_, where_indices))
if "brokers" in filter and filter["brokers"]:
BrokerGroupTicker = StockGroupTicker.alias()
query = query.join(
BrokerGroupTicker, peewee.JOIN.INNER, on=(Ticker.id == BrokerGroupTicker.ticker)
)
where_broker = []
for f in filter["brokers"]:
where_broker.append(BrokerGroupTicker.stock_index == int(f))
if len(where_broker):
query = query.where(peewee.reduce(peewee.operator.or_, where_broker))
return query.distinct()
This so weird, like i am trying to use if statement and the selection in combobox to do some specific command and when combobox value is selected to 'full_name' (part of elif) they return an messagebox, that is supposed to be showed only when the first if statement is executed but according to the conditions its supposed to return the elif part but it returns the if part. Is there a mistake in my code? If the Q is unclear please try referring the code or lemme knw :) Thanks in advance.
CODE:
def sp_patient():
#Creating window
sp_pat = Toplevel(update)
sp_pat.title('Choose Patient')
def search():
#Assigning variable to .get()
a = drops.get()
if a == 'id' or 'emirate_id' or 'email_adress' or 'gender' or 'DOB' or 'blood_grp' or 'COVID_test':
#Establishing connection
con = mysql.connect(host='***', user='nihaalnz',
password='****', database='nihaalnztrying')
# Making SQL command
c = con.cursor()
c.execute(f"SELECT * FROM patient_infos where `{a}` = '{e_1.get()}'")
# Executing and saving SQL command
records = c.fetchall()
if records == []:
messagebox.showinfo('Does not exist!','Sorry such patient does not exist')
else:
#Creating window
result_win = Toplevel(sp_pat)
result_win.title('Search result')
index=0
for index,x in enumerate(records):
num=0
for y in x:
lookup_label = Label(result_win,text=y)
lookup_label.grid(row=index+1,column=num)
num += 1
#Closing connection
con.close()
#Creating column header and exit button
l_1 = Label(result_win,text='ID',font=font_text)
l_2 = Label(result_win,text='Full Name',font=font_text)
l_3 = Label(result_win,text='Phone no.',font=font_text)
l_4 = Label(result_win,text='Emirates ID',font=font_text)
l_5 = Label(result_win,text='Email addr.',font=font_text)
l_6 = Label(result_win,text='Gender',font=font_text)
l_7 = Label(result_win,text='DOB',font=font_text)
l_8 = Label(result_win,text='Nationality',font=font_text)
l_9 = Label(result_win,text='Blood group',font=font_text)
l_10 = Label(result_win,text='COVID test',font=font_text)
l_11 = Label(result_win,text='Emergency no.',font=font_text)
btn_ext = Button(result_win,text='Exit',font=font_text,command=result_win.destroy,borderwidth=2,fg='#eb4d4b')
#Placing it in screen
l_1.grid(row=0,column=0,padx=20)
l_2.grid(row=0,column=1,padx=20)
l_3.grid(row=0,column=2,padx=20)
l_4.grid(row=0,column=3,padx=20)
l_5.grid(row=0,column=4,padx=20)
l_6.grid(row=0,column=5,padx=20)
l_7.grid(row=0,column=6,padx=20)
l_8.grid(row=0,column=7,padx=20)
l_9.grid(row=0,column=8,padx=20)
l_10.grid(row=0,column=9,padx=20)
l_11.grid(row=0,column=10,padx=20)
btn_ext.grid(row=index+2,columnspan=11,ipadx=240,sticky=E+W)
elif a == 'full_name' or 'ph_no' or 'nationality' or 'emergency_no':
#Creating window
result_win = Toplevel(sp_pat)
result_win.title('Search result')
#Establishing connection
con = mysql.connect(host='****', user='nihaalnz',
password='*****', database='nihaalnztrying')
# Making SQL command
c = con.cursor()
c.execute(f"SELECT * FROM patient_infos where `{a}` regexp '{e_1.get()}'")
# Executing and saving SQL command
records = c.fetchall()
index=0
for index,x in enumerate(records):
num=0
for y in x:
lookup_label = Label(result_win,text=y)
lookup_label.grid(row=index+1,column=num)
num += 1
#Closing connection
con.close()
#Creating column headers and exit button
l_1 = Label(result_win,text='ID',font=font_text)
l_2 = Label(result_win,text='Full Name',font=font_text)
l_3 = Label(result_win,text='Phone no.',font=font_text)
l_4 = Label(result_win,text='Emirates ID',font=font_text)
l_5 = Label(result_win,text='Email addr.',font=font_text)
l_6 = Label(result_win,text='Gender',font=font_text)
l_7 = Label(result_win,text='DOB',font=font_text)
l_8 = Label(result_win,text='Nationality',font=font_text)
l_9 = Label(result_win,text='Blood group',font=font_text)
l_10 = Label(result_win,text='COVID test',font=font_text)
l_11 = Label(result_win,text='Emergency no.',font=font_text)
btn_ext = Button(result_win,text='Exit',font=font_text,command=result_win.destroy,borderwidth=2,fg='#eb4d4b')
#Placing it on screen
l_1.grid(row=0,column=0,padx=20)
l_2.grid(row=0,column=1,padx=20)
l_3.grid(row=0,column=2,padx=20)
l_4.grid(row=0,column=3,padx=20)
l_5.grid(row=0,column=4,padx=20)
l_6.grid(row=0,column=5,padx=20)
l_7.grid(row=0,column=6,padx=20)
l_8.grid(row=0,column=7,padx=20)
l_9.grid(row=0,column=8,padx=20)
l_10.grid(row=0,column=9,padx=20)
l_11.grid(row=0,column=10,padx=20)
btn_ext.grid(row=index+2,columnspan=11,ipadx=240,sticky=E+W)
elif a == 'Search by...':
#Error message
messagebox.showinfo('No choice given','Please choose a valid option to search by...')
#Defining dropdown and entry box
drops = ttk.Combobox(sp_pat,value=['Search by...','id','full_name','ph_no','emirate_id','email_addr','gender','DOB','nationality','blood_grp','COVID_test','emergency_no'],state='readonly')
print(drops.get())
drops.current(0)
e_1 = Entry(sp_pat)
#Defining Labels and search button
l_sch = Label(sp_pat,text='Search',font=Font(size='20'))
l_id = Label(sp_pat,text='Enter',font=font_text)
bt_db = Button(sp_pat,text='Search',command=search)
#Placing it in screen
drops.grid(row=1,columnspan=3,ipady=5,padx=5,pady=10)
e_1.grid(row=2,column=1,ipady=5,padx=5,pady=5)
l_id.grid(row=2,column=0,padx=5,pady=5)
bt_db.grid(row=3,columnspan=2,padx=5,pady=5,sticky=E+W)
l_sch.grid(row=0,columnspan=2,sticky=E+W,padx=10,pady=10)
The problem is this line:
if a == 'id' or 'emirate_id' or...
This statement always return True. It is evaluating whether a=="id" or emirate_id is True, and a non-empty string always returns True.
You can be explicit and use:
if a == 'id' or a == 'emirate_id' or ...
Or better yet, use keyword in:
if a in ("id", "emirate_id",...)
I have to construct a dynamic update query for postgresql.
Its dynamic, because beforehand I have to determine which columns to update.
Given a sample table:
create table foo (id int, a int, b int, c int)
Then I will construct programmatically the "set" clause
_set = {}
_set['a'] = 10
_set['c'] = NULL
After that I have to build the update query. And here I'm stuck.
I have to construct this sql Update command:
update foo set a = 10, b = NULL where id = 1
How to do this with the psycopg2 parametrized command? (i.e. looping through the dict if it is not empty and build the set clause) ?
UPDATE
While I was sleeping I have found the solution by myself. It is dynamic, exactly how I wanted to be :-)
create table foo (id integer, a integer, b integer, c varchar)
updates = {}
updates['a'] = 10
updates['b'] = None
updates['c'] = 'blah blah blah'
sql = "upgrade foo set %s where id = %s" % (', '.join("%s = %%s" % u for u in updates.keys()), 10)
params = updates.values()
print cur.mogrify(sql, params)
cur.execute(sql, params)
And the result is what and how I needed (especially the nullable and quotable columns):
"upgrade foo set a = 10, c = 'blah blah blah', b = NULL where id = 10"
There is actually a slightly cleaner way to make it, using the alternative column-list syntax:
sql_template = "UPDATE foo SET ({}) = %s WHERE id = {}"
sql = sql_template.format(', '.join(updates.keys()), 10)
params = (tuple(addr_dict.values()),)
print cur.mogrify(sql, params)
cur.execute(sql, params)
Using psycopg2.sql – SQL string composition module
The module contains objects and functions useful to generate SQL dynamically, in a convenient and safe way.
from psycopg2 import connect, sql
conn = connect("dbname=test user=postgres")
upd = {'name': 'Peter', 'age': 35, 'city': 'London'}
ref_id = 12
sql_query = sql.SQL("UPDATE people SET {data} WHERE id = {id}").format(
data=sql.SQL(', ').join(
sql.Composed([sql.Identifier(k), sql.SQL(" = "), sql.Placeholder(k)]) for k in upd.keys()
),
id=sql.Placeholder('id')
)
upd.update(id=ref_id)
with conn:
with conn.cursor() as cur:
cur.execute(sql_query, upd)
conn.close()
Running print(sql_query.as_string(conn)) before closing connection will reveal this output:
UPDATE people SET "name" = %(name)s, "age" = %(age)s, "city" = %(city)s WHERE id = %(id)s
No need for dynamic SQL. Supposing a is not nullable and b is nullable.
If you want to update both a and b:
_set = dict(
id = 1,
a = 10,
b = 20, b_update = 1
)
update = """
update foo
set
a = coalesce(%(a)s, a), -- a is not nullable
b = (array[b, %(b)s])[%(b_update)s + 1] -- b is nullable
where id = %(id)s
"""
print cur.mogrify(update, _set)
cur.execute(update, _set)
Output:
update foo
set
a = coalesce(10, a), -- a is not nullable
b = (array[b, 20])[1 + 1] -- b is nullable
where id = 1
If you want to update none:
_set = dict(
id = 1,
a = None,
b = 20, b_update = 0
)
Output:
update foo
set
a = coalesce(NULL, a), -- a is not nullable
b = (array[b, 20])[0 + 1] -- b is nullable
where id = 1
An option without python format using psycopg2's AsIs function for column names (although that doesn't prevent you from SQL injection over column names). Dict is named data:
update_statement = f'UPDATE foo SET (%s) = %s WHERE id_column=%s'
columns = data.keys()
values = [data[column] for column in columns]
query = cur.mogrify(update_statement, (AsIs(','.join(columns)), tuple(values), id_value))
Here's my solution that I have within a generic DatabaseHandler class that provides a lot of flexibility when using pd.DataFrame as your source.
def update_data(
self,
table: str,
df: pd.DataFrame,
indexes: Optional[list] = None,
column_map: Optional[dict] = None,
commit: Optional[bool] = False,
) -> int:
"""Update data in the media database
Args:
table (str): the "tablename" or "namespace.tablename"
df (pandas.DataFrame): dataframe containing the data to update
indexes (list): the list of columns in the table that will be in the WHERE clause of the update statement.
If not provided, will use df indexes.
column_map (dict): dictionary mapping the columns in df to the columns in the table
columns in the column_map that are also in keys will not be updated
Key = df column.
Value = table column.
commit (bool): if True, the transaction will be committed (default=False)
Notes:
If using a column_map, only the columns in the data_map will be updated or used as indexes.
Order does not matter. If not using a column_map, all columns in df must exist in table.
Returns:
int : rows updated
"""
try:
if not indexes:
# Use the dataframe index instead
indexes = []
for c in df.index.names:
if not c:
raise Exception(
f"Dataframe contains indexes without names. Unable to determine update where clause."
)
indexes.append(c)
update_strings = []
tdf = df.reset_index()
if column_map:
target_columns = [c for c in column_map.keys() if c not in indexes]
else:
column_map = {c: c for c in tdf.columns}
target_columns = [c for c in df.columns if c not in indexes]
for i, r in tdf.iterrows():
upd_params = ", ".join(
[f"{column_map[c]} = %s" for c in target_columns]
)
upd_list = [r[c] if pd.notna(r[c]) else None for c in target_columns]
upd_str = self._cur.mogrify(upd_params, upd_list).decode("utf-8")
idx_params = " AND ".join([f"{column_map[c]} = %s" for c in indexes])
idx_list = [r[c] if pd.notna(r[c]) else None for c in indexes]
idx_str = self._cur.mogrify(idx_params, idx_list).decode("utf-8")
update_strings.append(f"UPDATE {table} SET {upd_str} WHERE {idx_str};")
full_update_string = "\n".join(update_strings)
print(full_update_string) # Debugging
self._cur.execute(full_update_string)
rowcount = self._cur.rowcount
if commit:
self.commit()
return rowcount
except Exception as e:
self.rollback()
raise e
Example usages:
>>> df = pd.DataFrame([
{'a':1,'b':'asdf','c':datetime.datetime.now()},
{'a':2,'b':'jklm','c':datetime.datetime.now()}
])
>>> cls.update_data('my_table', df, indexes = ['a'])
UPDATE my_table SET b = 'asdf', c = '2023-01-17T22:13:37.095245'::timestamp WHERE a = 1;
UPDATE my_table SET b = 'jklm', c = '2023-01-17T22:13:37.095250'::timestamp WHERE a = 2;
>>> cls.update_data('my_table', df, indexes = ['a','b'])
UPDATE my_table SET c = '2023-01-17T22:13:37.095245'::timestamp WHERE a = 1 AND b = 'asdf';
UPDATE my_table SET c = '2023-01-17T22:13:37.095250'::timestamp WHERE a = 2 AND b = 'jklm';
>>> cls.update_data('my_table', df.set_index('a'), column_map={'a':'db_a','b':'db_b','c':'db_c'} )
UPDATE my_table SET db_b = 'asdf', db_c = '2023-01-17T22:13:37.095245'::timestamp WHERE db_a = 1;
UPDATE my_table SET db_b = 'jklm', db_c = '2023-01-17T22:13:37.095250'::timestamp WHERE db_a = 2;
Note however that this is not safe from SQL injection due to the way it generates the where clause.
I have a python script that process several files of some gigabytes. With the following code I show below, I store some data into a list, which is stored into a dictionary snp_dict. The RAM consumption is huge. Looking at my code, could you suggest some ways to reduce RAM consumption, if any?
def extractAF(files_vcf):
z=0
snp_dict=dict()
for infile_name in sorted(files_vcf):
print ' * ' + infile_name
###single files
vcf_reader = vcf.Reader(open(infile_name, 'r'))
for record in vcf_reader:
snp_position='_'.join([record.CHROM, str(record.POS)])
ref_F = float(record.INFO['DP4'][0])
ref_R = float(record.INFO['DP4'][1])
alt_F = float(record.INFO['DP4'][2])
alt_R = float(record.INFO['DP4'][3])
AF = (alt_F+alt_R)/(alt_F+alt_R+ref_F+ref_R)
if not snp_position in snp_dict:
snp_dict[snp_position]=list((0) for _ in range(len(files_vcf)))
snp_dict[snp_position][z] = round(AF, 3) #record.INFO['DP4']
z+=1
return snp_dict
I finally adopted the following implementation with MySQL:
for infile_name in sorted(files_vcf):
print infile_name
###single files
vcf_reader = vcf.Reader(open(infile_name, 'r'))
for record in vcf_reader:
snp_position='_'.join([record.CHROM, str(record.POS)])
ref_F = float(record.INFO['DP4'][0])
ref_R = float(record.INFO['DP4'][1])
alt_F = float(record.INFO['DP4'][2])
alt_R = float(record.INFO['DP4'][3])
AF = (alt_F+alt_R)/(alt_F+alt_R+ref_F+ref_R)
if not snp_position in snp_dict:
sql_insert_table = "INSERT INTO snps VALUES ('" + snp_position + "'," + ",".join(list(('0') for _ in range(len(files_vcf)))) + ")"
cursor = db1.cursor()
cursor.execute(sql_insert_table)
db1.commit()
snp_dict.append(snp_position)
sql_update = "UPDATE snps SET " + str(z) + "g=" + str(AF) + " WHERE snp_pos='" + snp_position + "'";
cursor = db1.cursor()
cursor.execute(sql_update)
db1.commit()
z+=1
return snp_dict
For this sort of thing, you are probably better off using another data structure. A pandas DataFrame would work well in your situation.
The simplest solution would be to use an existing library, rather than writing your own parser. vcfnp can read vcf files into a format that is easily convertible to a pandas DataFrame. Something like this should work:
import pandas as pd
def extractAF(files_vcf):
dfs = []
for fname in sorted(files_vcf):
vars = vcfnp.variants(fname, fields=['CHROM', 'POS', 'DP4'])
snp_pos = np.char.add(np.char.add(vars.CHROM, '_'), record.POS.astype('S'))
dp4 = vars.DP4.astype('float')
AF = dp4[2:].sum(axis=0)/dp4.sum(axis=0)
dfs.append(pd.DataFrame(AF, index=snp_pos, columns=[fname]).T)
return pd.concat(dfs).fillna(0.0)
If you absolutely must use PyVCF, it will be slower, but hopefully this will at least be faster than your existing implementation, and should produce the same result as the above code:
def extractAF(files_vcf):
files_vcf = sorted(files_vcf)
dfs = []
for fname in files_vcf:
print ' * ' + fname
vcf_reader = vcf.Reader(open(fname, 'r'))
vars = ((rec.CHROM, rec.POS) + tuple(rec.INFO['DP4']) for rec in vcf_reader)
df = pd.DataFrame(vars, columns=['CHROMS', 'POS', 'ref_F', 'ref_R', 'alt_F', 'alt_R'])
df['snp_position'] = df['CHROMS'] + '_' + df['POS'].astype('S')
df_alt = df.loc[:, ('alt_F', 'alt_R')]
df_dp4 = df.loc[:, ('alt_F', 'alt_R', 'ref_F', 'ref_R')]
df[fname] = df_alt.sum(axis=1)/df_dp4.sum(axis=1)
df = df.set_index('snp_position', drop=True).loc[:, fname:fname].T
dfs.append(df)
return pd.concat(dfs).fillna(0.0)
Now lets say you wanted to read a particular snp_position, say contained in a variable snp_pos, that may or may not be there (from your comment), you wouldn't actually have to change anything:
all_vcf = extractAF(files_vcf)
if snp_pos in all_vcf:
linea_di_AF = all_vcf[snp_pos]
The result will be slightly different, though. It will be a pandas Series, which is like an array but can also be accessed like a dictionary:
all_vcf = extractAF(files_vcf)
if snp_pos in all_vcf:
linea_di_AF = all_vcf[snp_pos]
f_di_AF = linea_di_AF[files_vcf[0]]
This allows you to access a particular file/snp_pos pair directly:
all_vcf = extractAF(files_vcf)
if snp_pos in all_vcf:
f_di_AF = linea_di_AF[snp_pos][files_vcf[0]]
Or, better yet:
all_vcf = extractAF(files_vcf)
if snp_pos in all_vcf:
f_di_AF = linea_di_AF.loc[files_vcf[0], snp_pos]
Or you can get all snp_pos values for a given file:
all_vcf = extractAF(files_vcf)
fpos = linea_di_AF.loc[fname]