Big Query how to change mode of columns? - python

I have a Dataflow pipeline that fetches data from Pub/Sub and prepares them for insertion into Big Query and them writes them into the Database.
It works fine, it can generate the schema automatically and it is able to recognise what datatype to use and everything.
However the data we are using with it can vary vastly in format. Ex: we can get both A and B for a single column
A {"name":"John"}
B {"name":["Albert", "Einstein"]}
If the first message we get gets added, then adding the second one will not work.
If i do it the other way around it does however.
i always get the following error:
INFO:root:Error: 400 POST https://bigquery.googleapis.com/upload/bigquery/v2/project/projectname/jobs?uploadType=resumable: Provided Schema does not match Table project:test_dataset.test_table. Field cars has changed mode from NULLABLE to REPEATED with loading dataframe
ERROR:apache_beam.runners.direct.executor:Exception at bundle <apache_beam.runners.direct.bundle_factory._Bundle object at 0x7fcb9003f2c0>, due to an exception.
Traceback (most recent call last):
........
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
.....
Provided Schema does not match Table project.test_table. Field cars has changed mode from NULLABLE to REPEATED
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "apache_beam/runners/common.py", line 1233, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 582, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "newmain.py", line 211, in process
if load_job and load_job.errors:
UnboundLocalError: local variable 'load_job' referenced before assignment
Below is the code
class WriteDataframeToBQ(beam.DoFn):
def __init__(self, bq_dataset, bq_table, project_id):
self.bq_dataset = bq_dataset
self.bq_table = bq_table
self.project_id = project_id
def start_bundle(self):
self.client = bigquery.Client()
def process(self, df):
# table where we're going to store the data
table_id = f"{self.bq_dataset}.{self.bq_table}"
# function to help with the json -> bq schema transformations
generator = SchemaGenerator(input_format='dict', quoted_values_are_strings=True, keep_nulls=True)
# Get original schema to assist the deduce_schema function. If the table doesn't exist
# proceed with empty original_schema_map
try:
table = self.client.get_table(table_id)
original_schema = table.schema
self.client.schema_to_json(original_schema, "original_schema.json")
with open("original_schema.json") as f:
original_schema = json.load(f)
original_schema_map, original_schema_error_logs = generator.deduce_schema(input_data=original_schema)
except Exception:
logging.info(f"{table_id} table not exists. Proceed without getting schema")
original_schema_map = {}
# convert dataframe to dict
json_text = df.to_dict('records')
# generate the new schema, we need to write it to a file because schema_from_json only accepts json file as input
schema_map, error_logs = generator.deduce_schema(input_data=json_text, schema_map=original_schema_map)
schema = generator.flatten_schema(schema_map)
schema_file_name = "schema_map.json"
with open(schema_file_name, "w") as output_file:
json.dump(schema, output_file)
# convert the generated schema to a version that BQ understands
bq_schema = self.client.schema_from_json(schema_file_name)
job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
schema_update_options=[
bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION,
bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION
],
write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
schema=bq_schema
)
job_config.schema = bq_schema
try:
load_job = self.client.load_table_from_json(
json_text,
table_id,
job_config=job_config,
) # Make an API request.
load_job.result() # Waits for the job to complete.
if load_job.errors:
logging.info(f"error_result = {load_job.error_result}")
logging.info(f"errors = {load_job.errors}")
else:
logging.info(f'Loaded {len(df)} rows.')
except Exception as error:
logging.info(f'Error: {error} with loading dataframe')
if load_job and load_job.errors:
logging.info(f"error_result = {load_job.error_result}")
logging.info(f"errors = {load_job.errors}")
def run(argv):
parser = argparse.ArgumentParser()
known_args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args, save_main_session=True, streaming=True)
options = pipeline_options.view_as(JobOptions)
with beam.Pipeline(options=pipeline_options) as pipeline:
(
pipeline
| "Read PubSub Messages" >> beam.io.ReadFromPubSub(subscription=options.input_subscription)
| "Write Raw Data to Big Query" >> beam.ParDo(WriteDataframeToBQ(project_id=options.project_id, bq_dataset=options.bigquery_dataset, bq_table=options.bigquery_table))
)
if __name__ == "__main__":
logging.getLogger().setLevel(logging.INFO)
run(sys.argv)
Is there a way to change the restrictions of the table to make this work?

BigQuery isn't a document database, but a columnar oriented database. In addition, you can't update the schema of existing columns (only add or remove them).
For your use case, and because you can't know/predict the most generic schema of each of your field, the safer is to store the raw JSON as a string, and then to use the JSON functions of BigQuery to post process, in SQL, your data

Related

AWS Glue create_partition using boto3 successful, but Athena not showing results for query

I have a glue script to create new partitions using create_partition(). The glue script is running successfully, and i could see the partitions in the Athena console when using SHOW PARTITIONS. For glue script create_partitions, I did refer to this sample code here : https://medium.com/#bv_subhash/demystifying-the-ways-of-creating-partitions-in-glue-catalog-on-partitioned-s3-data-for-faster-e25671e65574
When I try to run a Athena query for a given partition which was newly added, I am getting no results.
Is it that I need to trigger the MSCK command, even if I add the partitions using create_partitions. Appreciate any suggestions please
.
I have got the solution myself, wanted to share with SO community, so it would be useful someone. The following code when run as a glue job, creates partitions, and can also be queried in Athena for the new partition columns. Please change/add the parameter values db name, table name, partition columns as needed.
import boto3
import urllib.parse
import os
import copy
import sys
# Configure database / table name and emp_id, file_id from workflow params?
DATABASE_NAME = 'my_db'
TABLE_NAME = 'enter_table_name'
emp_id_tmp = ''
file_id_tmp = ''
# # Initialise the Glue client using Boto 3
glue_client = boto3.client('glue')
#get current table schema for the given database name & table name
def get_current_schema(database_name, table_name):
try:
response = glue_client.get_table(
DatabaseName=DATABASE_NAME,
Name=TABLE_NAME
)
except Exception as error:
print("Exception while fetching table info")
sys.exit(-1)
# Parsing table info required to create partitions from table
table_data = {}
table_data['input_format'] = response['Table']['StorageDescriptor']['InputFormat']
table_data['output_format'] = response['Table']['StorageDescriptor']['OutputFormat']
table_data['table_location'] = response['Table']['StorageDescriptor']['Location']
table_data['serde_info'] = response['Table']['StorageDescriptor']['SerdeInfo']
table_data['partition_keys'] = response['Table']['PartitionKeys']
return table_data
#prepare partition input list using table_data
def generate_partition_input_list(table_data):
input_list = [] # Initializing empty list
part_location = "{}/emp_id={}/file_id={}/".format(table_data['table_location'], emp_id_tmp, file_id_tmp)
input_dict = {
'Values': [
emp_id_tmp, file_id_tmp
],
'StorageDescriptor': {
'Location': part_location,
'InputFormat': table_data['input_format'],
'OutputFormat': table_data['output_format'],
'SerdeInfo': table_data['serde_info']
}
}
input_list.append(input_dict.copy())
return input_list
#create partition dynamically using the partition input list
table_data = get_current_schema(DATABASE_NAME, TABLE_NAME)
input_list = generate_partition_input_list(table_data)
try:
create_partition_response = glue_client.batch_create_partition(
DatabaseName=DATABASE_NAME,
TableName=TABLE_NAME,
PartitionInputList=input_list
)
print('Glue partition created successfully.')
print(create_partition_response)
except Exception as e:
# Handle exception as per your business requirements
print(e)

Issue while creating a job_config parameter dynamically in bigquery

I am trying to load the data via bq_client.load_table_from_uri. I am getting below error not sure why.
I am expecting job_config as below
job_config = bigquery.LoadJobConfig(
schema=[bigquery.SchemaField("COL1","STRING"),
bigquery.SchemaField("COL2","STRING"),
bigquery.SchemaField("COL3","INTEGER"),
],
skip_leading_rows=1,
source_format=bigquery.SourceFormat.CSV,
field_delimiter='|',
max_bad_records=1
)
When I hardcode the job_config its works fine but when tries to generate dynamically its gives me error .
variable1 & variable2 are static ,bigquery_final_temp variable will be generted dynamically via some other process.
Note: job_config is hardcoded its datatype is "LoadJobConfig" but when its dynamically constructed its datatype is str. How can i convert this str datatype to "LoadJobConfig"
below is the file data,code,error
file data :
header|1234|2020-10-10
abc|xyz|12345
aws|gcp|1234
code:
uri = "gs://abc/sftp/test.csv"
var_delimitor = '|'
variable1 = 'bigquery.LoadJobConfig( schema=['
variable2 = '],skip_leading_rows=1,source_format=bigquery.SourceFormat.CSV,field_delimiter='+var_delimitor+',max_bad_records=1 )'
bigquery_final_temp = 'bigquery.SchemaField("COL1","STRING"),bigquery.SchemaField("COL2","STRING"),bigquery.SchemaField("COL3","INTEGER"),'
job_config_2=variable1 + bigquery_final_temp
job_config_final=job_config_2 + variable2
###########load data
load_job = bq_client.load_table_from_uri(
uri,Table_name, job_config=job_config_final
) # Make an API request.
print('Lots of content here 5')
load_job.result() # Wait for the job to complete.
print('Lots of content here 6')
table = bq_client.get_table(Table_name)
print("Loaded {} rows to table {}".format(table.num_rows, Table_name))
error:
TypeError: Expected an instance of LoadJobConfig class for the job_config parameter,
but received job_config = bigquery.LoadJobConfig( schema=[bigquery.SchemaField("COL1","STRING"),bigquery.SchemaField("COL2","STRING"),
bigquery.SchemaField("COL3","INTEGER"),],skip_leading_rows=1,source_format=bigquery.SourceFormat.CSV,field_delimiter='|',max_bad_records=1)

updating specific records in Firebase Firestore

I'm working on adding keywords to a collection of documents (company information records).
The keyword generation works perfectly but now I'm struggling to figure out why my update isn't working.
with open (file) as csv_file:
csv_reader=csv.DictReader(csv_file,delimiter=',')
line_count=0
for row in csv_reader:
uuid=row['uuid']
name=row['name'].lower()
keywords = ""
arrName = []
for c in name:
keywords += c
arrName.append(keywords)
print(uuid, arrName)
data = {
u'keywords': arrName}
doc_ref = store.collection("org_test").where("uuid", "==", uuid).update(data)
I know it's the last line but I get the same error if I use .update or .set.
uuid is: e1393508-30ea-8a36-3f96-dd3226033abd keywords are: ['w', 'we', 'wet', 'wetp', 'wetpa', 'wetpai', 'wetpain', 'wetpaint']
Traceback (most recent call last):
File "keywords.py", line 107, in <module>
doc_ref = store.collection("org_test").where("uuid", "==", uuid).update(data)
AttributeError: 'Query' object has no attribute 'update'
I've also tried:
doc_ref = store.collection("org_test").where("uuid", "==", uuid).document.update(data)
But I get a similar error.
You are getting this error because you cannot perform a query and fire an update call from it.
What you are doing in this part of your code:
store.collection("org_test").where("uuid", "==", uuid)
Is returning you a QuerySnapshot object, which is basically the result to a query, and this object does not contain an update() method.
In order to do that, you need to select the document using doc() which returns you a DocumentSnapshot object, and that object is what can trigger the update(), so you can do this in your code:
store.collection("org_test").document(uuid).update(data);
I realized that I had data with different data types. Most of the information can be a string but one is an array.
This was my code and solution. It takes a file name as a command-line argument, parses through the fields, creates an array of keywords based on the company name, and then lastly creates a new collection in my Firestore.
import csv
import argparse
import firebase_admin
import google.cloud
from google.cloud.firestore_v1 import ArrayUnion
from firebase_admin import credentials, firestore
def init_argparse() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Parse through the fields, create an array of keywords based on the company name and upload to Firestore.', add_help=False, usage=get_usage())
parser.add_argument('file', help='File to run against.')
parser.add_argument('-q', '--quiet', help='Quiet mode', action='store_true')
parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0.0', help='Version.')
return parser
# Launch program
if __name__ == '__main__':
parser = init_argparse()
args = parser.parse_args()
arg_quiet = args.quiet
file = args.file
cred = credentials.Certificate("./serviceAccountKey.json")
app = firebase_admin.initialize_app(cred)
db = firestore.client()
with open (file) as csv_file:
csv_reader=csv.DictReader(csv_file,delimiter=',')
line_count=0
co = []
for row in csv_reader:
# Initialize variables
uuid=row["uuid"]
name=row["name"].lower()
primary_role=row["primary_role"]
cb_url=row["cb_url"]
domain=row["domain"]
homepage_url=row["homepage_url"]
logo_url=row["logo_url"]
facebook_url=row["facebook_url"]
twitter_url=row["twitter_url"]
linkedin_url=row["linkedin_url"]
combined_stock_symbols=row["combined_stock_symbols"]
city=row["city"]
region=row["region"]
country_code=row["country_code"]
short_description=row["short_description"]
keywords = ""
arrName = []
clean = []
# Loop to create searchable keywords
for c in name:
keywords += c
arrName.append(keywords)
doc_ref = db.collection("orgs").document()
doc_ref.set({
u'uuid': uuid,
u'name': name,
u'keywords': ArrayUnion(arrName),
u'primary_role':primary_role,
u'cb_url': cb_url,
u'domain': domain,
u'homepage_url': homepage_url,
u'logo_url': logo_url,
u'facebook_url': facebook_url,
u'twitter_url': twitter_url,
u'linkedin_url': linkedin_url,
u'combined_stock_symbols': combined_stock_symbols,
u'city': city,
u'region': region,
u'country_code': country_code,
u'short_description': short_description
})

Add a date loaded field when uploading csv to big query

Using Python.
Is there any way to add an extra field while processing a csv file to Big Query.
I'd like to add a date_loaded field with the current date ?
Google code example I have used ..
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField('name', 'STRING'),
bigquery.SchemaField('post_abbr', 'STRING')
]
job_config.skip_leading_rows = 1
# The source format defaults to CSV, so the line below is optional.
job_config.source_format = bigquery.SourceFormat.CSV
uri = 'gs://cloud-samples-data/bigquery/us-states/us-states.csv'
load_job = client.load_table_from_uri(
uri,
dataset_ref.table('us_states'),
job_config=job_config) # API request
print('Starting job {}'.format(load_job.job_id))
load_job.result() # Waits for table load to complete.
print('Job finished.')
destination_table = client.get_table(dataset_ref.table('us_states'))
print('Loaded {} rows.'.format(destination_table.num_rows))
By modifying this Python example to fit your issue you open and read the original CSV file from my local PC, edit it by adding a column and append timestamps at the end of each line to avoid having an empty column. This link explains how to get a timestamp in Python with custom date and time.
Then you write the resulting data to an output file and load it to Google Storage. Here you can find the information on how to run external commands from a Python file.
I hope this helps.
#Import the dependencies
import csv,datetime,subprocess
from google.cloud import bigquery
#Replace the values for variables with the appropriate ones
#Name of the input csv file
csv_in_name = 'us-states.csv'
#Name of the output csv file to avoid messing up the original
csv_out_name = 'out_file_us-states.csv'
#Name of the NEW COLUMN NAME to be added
new_col_name = 'date_loaded'
#Type of the new column
col_type = 'DATETIME'
#Name of your bucket
bucket_id = 'YOUR BUCKET ID'
#Your dataset name
ds_id = 'YOUR DATASET ID'
#The destination table name
destination_table_name = 'TABLE NAME'
# read and write csv files
with open(csv_in_name,'r') as r_csvfile:
with open(csv_out_name,'w') as w_csvfile:
dict_reader = csv.DictReader(r_csvfile,delimiter=',')
#add new column with existing
fieldnames = dict_reader.fieldnames + [new_col_name]
writer_csv = csv.DictWriter(w_csvfile,fieldnames,delimiter=',')
writer_csv.writeheader()
for row in dict_reader:
#Put the timestamp after the last comma so that the column is not empty
row[new_col_name] = datetime.datetime.now()
writer_csv.writerow(row)
#Copy the file to your Google Storage bucket
subprocess.call('gsutil cp ' + csv_out_name + ' gs://' + bucket_id , shell=True)
client = bigquery.Client()
dataset_ref = client.dataset(ds_id)
job_config = bigquery.LoadJobConfig()
#Add a new column to the schema!
job_config.schema = [
bigquery.SchemaField('name', 'STRING'),
bigquery.SchemaField('post_abbr', 'STRING'),
bigquery.SchemaField(new_col_name, col_type)
]
job_config.skip_leading_rows = 1
# The source format defaults to CSV, so the line below is optional.
job_config.source_format = bigquery.SourceFormat.CSV
#Address string of the output csv file
uri = 'gs://' + bucket_id + '/' + csv_out_name
load_job = client.load_table_from_uri(uri,dataset_ref.table(destination_table_name),job_config=job_config) # API request
print('Starting job {}'.format(load_job.job_id))
load_job.result() # Waits for table load to complete.
print('Job finished.')
destination_table = client.get_table(dataset_ref.table(destination_table_name))
print('Loaded {} rows.'.format(destination_table.num_rows))
You can keep loading your data as you are loading, but into a table called old_table.
Once loaded, you can run something like:
bq --location=US query --destination_table mydataset.newtable --use_legacy_sql=false --replace=true 'select *, current_date() as date_loaded from mydataset.old_table'
This basically loads the content of old table with a new column of date_loaded at the end to the new_table. This way, you now have a new column without downloading locally or all the mess.

Import JSON to NEO4J using py2neo

I am trying to import a JSON file obtained thru an API request to StackOverflow to NEO4J. I have been following this tutorial. However, I get errors like the following while trying to execute the query:
File "/Users/ahmedov/anaconda/lib/python2.7/site-packages/py2neo/cypher/core.py", line 306, in commit
return self.post(self.__commit or self.__begin_commit)
File "/Users/ahmedov/anaconda/lib/python2.7/site-packages/py2neo/cypher/core.py", line 261, in post
raise self.error_class.hydrate(error)
File "/Users/ahmedov/anaconda/lib/python2.7/site-packages/py2neo/cypher/error/core.py", line 54, in hydrate
error_cls = getattr(error_module, title)
AttributeError: 'module' object has no attribute 'SyntaxError'
I am using the following code:
import os
import requests
from py2neo import neo4j
from py2neo import Graph
from py2neo import Path, authenticate
# set up authentication parameters
authenticate("localhost:7474", "neo4j", "neo4j")
# connect to authenticated graph database
#graph = Graph("http://localhost:7474/db/data/")
# Connect to graph and add constraints.
neo4jUrl = os.environ.get('NEO4J_URL',"http://localhost:7474/db/data/")
graph = neo4j.Graph(neo4jUrl)
# Connect to graph and add constraints.
#neo4jUrl = os.environ.get('NEO4J_URL',"http://localhost:7474/db/data/")
#graph = neo4j.GraphDatabaseService(neo4jUrl)
# Add uniqueness constraints.
graph.cypher.execute("CREATE CONSTRAINT ON (q:Question) ASSERT q.id IS UNIQUE;")
# Build URL.
apiUrl ="https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf"
# Send GET request.
json = requests.get(apiUrl, headers = {"accept":"application/json"}).json()
# Build query.
query = """
UNWIND data.items as q
MERGE (question:Question {id:q.question_id}) ON CREATE
SET question.title = q.title, question.share_link = q.share_link, question.favorite_count = q.favorite_count
MERGE (owner:User {id:q.owner.user_id}) ON CREATE SET owner.display_name = q.owner.display_name
MERGE (owner)-[:ASKED]->(question)
FOREACH (tagName IN q.tags | MERGE (tag:Tag {name:tagName}) MERGE (question)-[:TAGGED]->(tag))
FOREACH (a IN q.answers |
MERGE (question)<-[:ANSWERS]-(answer:Answer {id:a.answer_id})
MERGE (answerer:User {id:a.owner.user_id}) ON CREATE SET answerer.display_name = a.owner.display_name
MERGE (answer)<-[:PROVIDED]-(answerer)
)
"""
statement = "MERGE (n:Person {name:{N}}) RETURN n"
results = graph.cypher.run(query,json=json)
tx = graph.cypher.begin()
def add_names(*names):
for name in names:
tx.append(statement, {"N": name})
tx.process()
add_names("Homer", "Marge", "Bart", "Lisa", "Maggie")
add_names("Peter", "Lois", "Chris", "Meg", "Stewie")
tx.append(query,)
tx.commit()
# Send Cypher query.
The problem originates from the following line:
results = graph.cypher.run(query,json=json)
I had to change the above line to adjust it to the newer py2neo api. The original line looked like this:
neo4j.CypherQuery(graph, query).run(json=json)
So basically, I need to find a way to tell neo4j that I need to process the JSON file using the given query. I tried to read the documentary and searching the web with no luck. Any help would be appreciated.
A couple of things to make your script work :
from py2neo import neo4j is not a valid dependency anymore
In your query, you pass a json map as parameter but you don't use the parameters syntax in the query, I added WITH {json} as data in the beginning of the query.
Added secure=False for the connection
The last tx.append(query,) is not needed.
Working script :
import os
import requests
#from py2neo import neo4j
from py2neo import Graph
from py2neo import Path, authenticate
# set up authentication parameters
authenticate("localhost:7474", "neo4j", "neo4j")
# connect to authenticated graph database
#graph = Graph("http://localhost:7474/db/data/")
# Connect to graph and add constraints.
neo4jUrl = os.environ.get('NEO4J_URL',"http://localhost:7474/db/data/")
graph = Graph(neo4jUrl,secure=False)
# Connect to graph and add constraints.
#neo4jUrl = os.environ.get('NEO4J_URL',"http://localhost:7474/db/data/")
#graph = neo4j.GraphDatabaseService(neo4jUrl)
# Add uniqueness constraints.
graph.run("CREATE CONSTRAINT ON (q:Question) ASSERT q.id IS UNIQUE;")
# Build URL.
apiUrl ="https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf"
# Send GET request.
json = requests.get(apiUrl, headers = {"accept":"application/json"}).json()
#print(json);
# Build query.
query = """
WITH {json} as data
UNWIND data.items as q
MERGE (question:Question {id:q.question_id}) ON CREATE
SET question.title = q.title, question.share_link = q.share_link, question.favorite_count = q.favorite_count
MERGE (owner:User {id:q.owner.user_id}) ON CREATE SET owner.display_name = q.owner.display_name
MERGE (owner)-[:ASKED]->(question)
FOREACH (tagName IN q.tags | MERGE (tag:Tag {name:tagName}) MERGE (question)-[:TAGGED]->(tag))
FOREACH (a IN q.answers |
MERGE (question)<-[:ANSWERS]-(answer:Answer {id:a.answer_id})
MERGE (answerer:User {id:a.owner.user_id}) ON CREATE SET answerer.display_name = a.owner.display_name
MERGE (answer)<-[:PROVIDED]-(answerer)
)
"""
statement = "MERGE (n:Person {name:{N}}) RETURN n"
results = graph.run(query,json=json)
tx = graph.begin()
def add_names(*names):
for name in names:
tx.append(statement, {"N": name})
tx.process()
add_names("Homer", "Marge", "Bart", "Lisa", "Maggie")
add_names("Peter", "Lois", "Chris", "Meg", "Stewie")
#tx.append(query,)
tx.commit()
Result :
Using a recent version of py2neo, the syntax changes a little bit. Rather than use {json}, is necessary to use $json as explained below:
from py2neo import Graph
graph = Graph(NEO4J_URI, user=NEO4J_USER, password=NEO4j_PASSWORD)
json = ['22644198319', '15383242341']
result = graph.run(
"""WITH $json as data
UNWIND data as id
MATCH (c:Person {id: id}) RETURN c.name""",
json=json
)
data_frame_result = result.to_data_frame()
print(data_frame_result)
Making these changes in the #christophe-willemsen's example, the code can run in the newest versions.

Categories

Resources