How to propagate mlpipeline-metrics from custom Python function TFX component? - python

Note: this is a copy of a GitHub issue I reported.
It is re-posted in hope to get more attention, I will update any solutions on either site.
Question
I want to export mlpipeline-metrics from my custom Python function TFX component so that it is displayed in the KubeFlow UI.
This is a minimal example of what I am trying to do:
import json
from tfx.dsl.component.experimental.annotations import OutputArtifact
from tfx.dsl.component.experimental.decorators import component
from tfx.types.standard_artifacts import Artifact
class Metric(Artifact):
TYPE_NAME = 'Metric'
#component
def ShowMetric(MLPipeline_Metrics: OutputArtifact[Metric]):
rmse_eval = 333.33
metrics = {
'metrics':[
{
'name': 'RMSE-validation',
'numberValue': rmse_eval,
'format': 'RAW'
}
]
}
path = '/tmp/mlpipeline-metrics.json'
with open(path, 'w') as _file:
json.dump(metrics, _file)
MLPipeline_Metrics.uri = path
In the KubeFlow UI, the "Run output" tab says "No metrics found for this run." However, the output artefact shows up in the ML MetaData (see screenshot). Any help on how to accomplish this would be greatly appreciated. Thanks!

Related

Reading Data in Vertex AI Pipelines

This is my first time using Google's Vertex AI Pipelines. I checked this codelab as well as this post and this post, on top of some links derived from the official documentation. I decided to put all that knowledge to work, in some toy example: I was planning to build a pipeline consisting of 2 components: "get-data" (which reads some .csv file stored in Cloud Storage) and "report-data" (which basically returns the shape of the .csv data read in the previous component). Furthermore, I was cautious to include some suggestions provided in this forum. The code I currently have, goes as follows:
from kfp.v2 import compiler
from kfp.v2.dsl import pipeline, component, Dataset, Input, Output
from google.cloud import aiplatform
# Components section
#component(
packages_to_install=[
"google-cloud-storage",
"pandas",
],
base_image="python:3.9",
output_component_file="get_data.yaml"
)
def get_data(
bucket: str,
url: str,
dataset: Output[Dataset],
):
import pandas as pd
from google.cloud import storage
storage_client = storage.Client("my-project")
bucket = storage_client.get_bucket(bucket)
blob = bucket.blob(url)
blob.download_to_filename('localdf.csv')
# path = "gs://my-bucket/program_grouping_data.zip"
df = pd.read_csv('localdf.csv', compression='zip')
df['new_skills'] = df['new_skills'].apply(ast.literal_eval)
df.to_csv(dataset.path + ".csv" , index=False, encoding='utf-8-sig')
#component(
packages_to_install=["pandas"],
base_image="python:3.9",
output_component_file="report_data.yaml"
)
def report_data(
inputd: Input[Dataset],
):
import pandas as pd
df = pd.read_csv(inputd.path)
return df.shape
# Pipeline section
#pipeline(
# Default pipeline root. You can override it when submitting the pipeline.
pipeline_root=PIPELINE_ROOT,
# A name for the pipeline.
name="my-pipeline",
)
def my_pipeline(
url: str = "test_vertex/pipeline_root/program_grouping_data.zip",
bucket: str = "my-bucket"
):
dataset_task = get_data(bucket, url)
dimensions = report_data(
dataset_task.output
)
# Compilation section
compiler.Compiler().compile(
pipeline_func=my_pipeline, package_path="pipeline_job.json"
)
# Running and submitting job
from datetime import datetime
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
run1 = aiplatform.PipelineJob(
display_name="my-pipeline",
template_path="pipeline_job.json",
job_id="mlmd-pipeline-small-{0}".format(TIMESTAMP),
parameter_values={"url": "test_vertex/pipeline_root/program_grouping_data.zip", "bucket": "my-bucket"},
enable_caching=True,
)
run1.submit()
I was happy to see that the pipeline compiled with no errors, and managed to submit the job. However "my happiness lasted short", as when I went to Vertex AI Pipelines, I stumbled upon some "error", which goes like:
The DAG failed because some tasks failed. The failed tasks are: [get-data].; Job (project_id = my-project, job_id = 4290278978419163136) is failed due to the above error.; Failed to handle the job: {project_number = xxxxxxxx, job_id = 4290278978419163136}
I did not find any related info on the web, neither could I find any log or something similar, and I feel a bit overwhelmed that the solution to this (seemingly) easy example, is still eluding me.
Quite obviously, I don't what or where I am mistaking. Any suggestion?
With some suggestions provided in the comments, I think I managed to make my demo pipeline work. I will first include the updated code:
from kfp.v2 import compiler
from kfp.v2.dsl import pipeline, component, Dataset, Input, Output
from datetime import datetime
from google.cloud import aiplatform
from typing import NamedTuple
# Importing 'COMPONENTS' of the 'PIPELINE'
#component(
packages_to_install=[
"google-cloud-storage",
"pandas",
],
base_image="python:3.9",
output_component_file="get_data.yaml"
)
def get_data(
bucket: str,
url: str,
dataset: Output[Dataset],
):
"""Reads a csv file, from some location in Cloud Storage"""
import ast
import pandas as pd
from google.cloud import storage
# 'Pulling' demo .csv data from a know location in GCS
storage_client = storage.Client("my-project")
bucket = storage_client.get_bucket(bucket)
blob = bucket.blob(url)
blob.download_to_filename('localdf.csv')
# Reading the pulled demo .csv data
df = pd.read_csv('localdf.csv', compression='zip')
df['new_skills'] = df['new_skills'].apply(ast.literal_eval)
df.to_csv(dataset.path + ".csv" , index=False, encoding='utf-8-sig')
#component(
packages_to_install=["pandas"],
base_image="python:3.9",
output_component_file="report_data.yaml"
)
def report_data(
inputd: Input[Dataset],
) -> NamedTuple("output", [("rows", int), ("columns", int)]):
"""From a passed csv file existing in Cloud Storage, returns its dimensions"""
import pandas as pd
df = pd.read_csv(inputd.path+".csv")
return df.shape
# Building the 'PIPELINE'
#pipeline(
# i.e. in my case: PIPELINE_ROOT = 'gs://my-bucket/test_vertex/pipeline_root/'
# Can be overriden when submitting the pipeline
pipeline_root=PIPELINE_ROOT,
name="readcsv-pipeline", # Your own naming for the pipeline.
)
def my_pipeline(
url: str = "test_vertex/pipeline_root/program_grouping_data.zip",
bucket: str = "my-bucket"
):
dataset_task = get_data(bucket, url)
dimensions = report_data(
dataset_task.output
)
# Compiling the 'PIPELINE'
compiler.Compiler().compile(
pipeline_func=my_pipeline, package_path="pipeline_job.json"
)
# Running the 'PIPELINE'
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
run1 = aiplatform.PipelineJob(
display_name="my-pipeline",
template_path="pipeline_job.json",
job_id="mlmd-pipeline-small-{0}".format(TIMESTAMP),
parameter_values={
"url": "test_vertex/pipeline_root/program_grouping_data.zip",
"bucket": "my-bucket"
},
enable_caching=True,
)
# Submitting the 'PIPELINE'
run1.submit()
Now, I will add some complementary comments, which in sum, managed to solve my problem:
First, having the "Logs Viewer" (roles/logging.viewer) enabled for your user, will greatly help to troubleshoot any existing error in your pipeline (Note: that role worked for me, however you might want to look for a better matching role for you own purposes here). Those errors will appear as "Logs", which can be accessed by clicking the corresponding button:
NOTE: In the picture above, when the "Logs" are displayed, it might be helpful to carefully check each log (close to the time when you created you pipeline), as generally each eof them corresponds with a single warning or error line:
Second, the output of my pipeline was a tuple. In my original approach, I just returned the plain tuple, but it is advised to return a NamedTuple instead. In general, if you need to input / output one or more "small values" (int or str, for any reason), pick a NamedTuple to do so.
Third, when the connection between your pipelines is Input[Dataset] or Ouput[Dataset], adding the file extension is needed (and quite easy to forget). Take for instance the ouput of the get_data component, and notice how the data is recorded by specifically adding the file extension, i.e. dataset.path + ".csv".
Of course, this is a very tiny example, and projects can easily scale to huge projects, however as some sort of "Hello Vertex AI Pipelines" it will work well.
Thank you.
Thanks for your writeup. Very helpful! I had the same error, but it turned out to be for a different reasons, so noting it here...
In my pipeline definition step I have the following parameters...
'''
def my_pipeline(bq_source_project: str = BQ_SOURCE_PROJECT,
bq_source_dataset: str = BQ_SOURCE_DATASET,
bq_source_table: str = BQ_SOURCE_TABLE,
output_data_path: str = "crime_data.csv"):
'''
My error was when I run the pipeline, I did not have these same parameters entered. Below is the fixed version...
'''
job = pipeline_jobs.PipelineJob(
project=PROJECT_ID,
location=LOCATION,
display_name=PIPELINE_NAME,
job_id=JOB_ID,
template_path=FILENAME,
pipeline_root=PIPELINE_ROOT,
parameter_values={'bq_source_project': BQ_SOURCE_PROJECT,
'bq_source_dataset': BQ_SOURCE_DATASET,
'bq_source_table': BQ_SOURCE_TABLE}
'''

Using python and suds, data not read by server side because element is not defined as an array

I am a very inexperienced programmer with no formal education. Details will be extremely helpful in any responses.
I have made several basic python scripts to call SOAP APIs, but I am running into an issue with a specific API function that has an embedded array.
Here is a sample excerpt from a working XML format to show nested data:
<bomData xsi:type="urn:inputBOM" SOAP-ENC:arrayType="urn:bomItem[]">
<bomItem>
<item_partnum></item_partnum>
<item_partrev></item_partrev>
<item_serial></item_serial>
<item_lotnum></item_lotnum>
<item_sublotnum></item_sublotnum>
<item_qty></item_qty>
</bomItem>
<bomItem>
<item_partnum></item_partnum>
<item_partrev></item_partrev>
<item_serial></item_serial>
<item_lotnum></item_lotnum>
<item_sublotnum></item_sublotnum>
<item_qty></item_qty>
</bomItem>
</bomData>
I have tried 3 different things to get this to work to no avail.
I can generate the near exact XML from my script, but a key attribute missing is the 'SOAP-ENC:arrayType="urn:bomItem[]"' in the above XML example.
Option 1 was using MessagePlugin, but I get an error because my section is like the 3 element and it always injects into the first element. I have tried body[2], but this throws an error.
Option 2 I am trying to create the object(?). I read a lot of stack overflow, but I might be missing something for this.
Option 3 looked simple enough, but also failed. I tried setting the values in the JSON directly. I got these examples by an XML sample to JSON.
I have also done a several other minor things to try to get it working, but not worth mentioning. Although, if there is a way to somehow do the following, then I'm all ears:
bomItem[]: bomData = {"bomItem"[{...,...,...}]}
Here is a sample of my script:
# for python 3
# using pip install suds-py3
from suds.client import Client
from suds.plugin import MessagePlugin
# Config
#option 1: trying to set it as an array using plugin
class MyPlugin(MessagePlugin):
def marshalled(self, context):
body = context.envelope.getChild('Body')
bomItem = body[0]
bomItem.set('SOAP-ENC:arrayType', 'urn:bomItem[]')
URL = "http://localhost/application/soap?wsdl"
client = Client(URL, plugins=[MyPlugin()])
transact_info = {
"username":"",
"transaction":"",
"workorder":"",
"serial":"",
"trans_qty":"",
"seqnum":"",
"opcode":"",
"warehouseloc":"",
"warehousebin":"",
"machine_id":"",
"comment":"",
"defect_code":""
}
#WIP - trying to get bomData below working first
inputData = {
"dataItem":[
{
"fieldname": "",
"fielddata": ""
}
]
}
#option 2: trying to create the element here and define as an array
#inputbom = client.factory.create('ns3:inputBOM')
#inputbom._type = "SOAP-ENC:arrayType"
#inputbom.value = "urn:bomItem[]"
bomData = {
#Option 3: trying to set the time and array type in JSON
#"#xsi:type":"urn:inputBOM",
#"#SOAP-ENC:arrayType":"urn:bomItem[]",
"bomItem":[
{
"item_partnum":"",
"item_partrev":"",
"item_serial":"",
"item_lotnum":"",
"item_sublotnum":"",
"item_qty":""
},
{
"item_partnum":"",
"item_partrev":"",
"item_serial":"",
"item_lotnum":"",
"item_sublotnum":"",
"item_qty":""
}
]
}
try:
response = client.service.transactUnit(transact_info,inputData,bomData)
print("RESPONSE: ")
print(response)
#print(client)
#print(envelope)
except Exception as e:
#handle error here
print(e)
I appreciate any help and hope it is easy to solve.
I have found the answer I was looking for. At least a working solution.
In any case, option 1 worked out. I read up on it at the following link:
https://suds-py3.readthedocs.io/en/latest/
You can review at the '!MessagePlugin' section.
I found a solution to get message plugin working from the following post:
unmarshalling Error: For input string: ""
A user posted an example how to crawl through the XML structure and modify it.
Here is my modified example to get my script working:
#Using MessagePlugin to modify elements before sending to server
class MyPlugin(MessagePlugin):
# created method that could be reused to modify sections with similar
# structure/requirements
def addArrayType(self, dataType, arrayType, transactUnit):
# this is the code that is key to crawling through the XML - I get
# the child of each parent element until I am at the right level for
# modification
data = transactUnit.getChild(dataType)
if data:
data.set('SOAP-ENC:arrayType', arrayType)
def marshalled(self, context):
# Alter the envelope so that the xsd namespace is allowed
context.envelope.nsprefixes['xsd'] = 'http://www.w3.org/2001/XMLSchema'
body = context.envelope.getChild('Body')
transactUnit = body.getChild("transactUnit")
if transactUnit:
self.addArrayType('inputData', 'urn:dataItem[]', transactUnit)
self.addArrayType('bomData', 'urn:bomItem[]', transactUnit)

ModelUploadOp step failing with custom prediction container

I am currenlty trying to deploy a Vertex pipeline to achieve the following:
Train a custom model (from a custom training python package) and dump model artifacts (trained model and data preprocessor that will be sed at prediction time). This is step is working fine as I can see new resources being created in the storage bucket.
Create a model resource via ModelUploadOp. This step fails for some reason when specifying serving_container_environment_variables and serving_container_ports with the error message in the errors section below. This is somewhat surprising as they are both needed by the prediction container and environment variables are passed as a dict as specified in the documentation.
This step works just fine using gcloud commands:
gcloud ai models upload \
--region us-west1 \
--display-name session_model_latest \
--container-image-uri gcr.io/and-reporting/pred:latest \
--container-env-vars="MODEL_BUCKET=ml_session_model" \
--container-health-route=//health \
--container-predict-route=//predict \
--container-ports=5000
Create an endpoint.
Deploy the model to the endpoint.
There is clearly something that I am getting wrong with Vertex, the components documentation doesn't help much in this case.
Pipeline
from datetime import datetime
import kfp
from google.cloud import aiplatform
from google_cloud_pipeline_components import aiplatform as gcc_aip
from kfp.v2 import compiler
PIPELINE_ROOT = "gs://ml_model_bucket/pipeline_root"
#kfp.dsl.pipeline(name="session-train-deploy", pipeline_root=PIPELINE_ROOT)
def pipeline():
training_op = gcc_aip.CustomPythonPackageTrainingJobRunOp(
project="my-project",
location="us-west1",
display_name="train_session_model",
model_display_name="session_model",
service_account="name#my-project.iam.gserviceaccount.com",
environment_variables={"MODEL_BUCKET": "ml_session_model"},
python_module_name="trainer.train",
staging_bucket="gs://ml_model_bucket/",
base_output_dir="gs://ml_model_bucket/",
args=[
"--gcs-data-path",
"gs://ml_model_data/2019-Oct_short.csv",
"--gcs-model-path",
"gs://ml_model_bucket/model/model.joblib",
"--gcs-preproc-path",
"gs://ml_model_bucket/model/preproc.pkl",
],
container_uri="us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest",
python_package_gcs_uri="gs://ml_model_bucket/trainer-0.0.1.tar.gz",
model_serving_container_image_uri="gcr.io/my-project/pred",
model_serving_container_predict_route="/predict",
model_serving_container_health_route="/health",
model_serving_container_ports=[5000],
model_serving_container_environment_variables={
"MODEL_BUCKET": "ml_model_bucket/model"
},
)
model_upload_op = gcc_aip.ModelUploadOp(
project="and-reporting",
location="us-west1",
display_name="session_model",
serving_container_image_uri="gcr.io/my-project/pred:latest",
# When passing the following 2 arguments this step fails...
serving_container_environment_variables={"MODEL_BUCKET": "ml_model_bucket/model"},
serving_container_ports=[5000],
serving_container_predict_route="/predict",
serving_container_health_route="/health",
)
model_upload_op.after(training_op)
endpoint_create_op = gcc_aip.EndpointCreateOp(
project="my-project",
location="us-west1",
display_name="pipeline_endpoint",
)
model_deploy_op = gcc_aip.ModelDeployOp(
model=model_upload_op.outputs["model"],
endpoint=endpoint_create_op.outputs["endpoint"],
deployed_model_display_name="session_model",
traffic_split={"0": 100},
service_account="name#my-project.iam.gserviceaccount.com",
)
model_deploy_op.after(endpoint_create_op)
if __name__ == "__main__":
ts = datetime.now().strftime("%Y%m%d%H%M%S")
compiler.Compiler().compile(pipeline, "custom_train_pipeline.json")
pipeline_job = aiplatform.PipelineJob(
display_name="session_train_and_deploy",
template_path="custom_train_pipeline.json",
job_id=f"session-custom-pipeline-{ts}",
enable_caching=True,
)
pipeline_job.submit()
Errors and notes
When specifying serving_container_environment_variables and serving_container_ports the step fails with the following error:
{'code': 400, 'message': 'Invalid JSON payload received. Unknown name "MODEL_BUCKET" at \'model.container_spec.env[0]\': Cannot find field.\nInvalid value at \'model.container_spec.ports[0]\' (type.googleapis.com/google.cloud.aiplatform.v1.Port), 5000', 'status': 'INVALID_ARGUMENT', 'details': [{'#type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'field': 'model.container_spec.env[0]', 'description': 'Invalid JSON payload received. Unknown name "MODEL_BUCKET" at \'model.container_spec.env[0]\': Cannot find field.'}, {'field': 'model.container_spec.ports[0]', 'description': "Invalid value at 'model.container_spec.ports[0]' (type.googleapis.com/google.cloud.aiplatform.v1.Port), 5000"}]}]}
When commenting out serving_container_environment_variables and serving_container_ports the model resource gets created but deploying it manually to the endpoint results into a failed deployment with no output logs.
After some time researching the problem I've stumbled upon this Github issue. The problem was originated by a mismatch between google_cloud_pipeline_components and kubernetes_api docs. In this case, serving_container_environment_variables is typed as an Optional[dict[str, str]] whereas it should have been typed as a Optional[list[dict[str, str]]]. A similar mismatch can be found for serving_container_ports argument as well. Passing arguments following kubernetes documentation did the trick:
model_upload_op = gcc_aip.ModelUploadOp(
project="my-project",
location="us-west1",
display_name="session_model",
serving_container_image_uri="gcr.io/my-project/pred:latest",
serving_container_environment_variables=[
{"name": "MODEL_BUCKET", "value": "ml_session_model"}
],
serving_container_ports=[{"containerPort": 5000}],
serving_container_predict_route="/predict",
serving_container_health_route="/health",
)

Add template popup to GeoJSON component with Python Dash-Leaflet

I am a very beginner in programmation and Python. I have a map application built with dash-leaflet with several (~10) GeoJSON files included by dl.GeoJSON component. I would like to show a popup with all the properties of each file. Before dl.GeoJSON was implemented, i used to create my layers by reading my geojson and defining popup like this :
def compute_geojson(gjson):
geojson = json.load(open(gjson["path"],encoding='utf8'))
if 'Polygon' in geojson["features"][0]["geometry"]["type"]:
data = [
dl.Polygon(
positions=get_geom(feat),
children=[
dl.Popup([html.P(k + " : " + str(v)) for k,v in feat["properties"].items()],maxHeight=300),
],
color=get_color(gjson,feat), weight=0.2, fillOpacity=gjson["opacity"], stroke=True
) for feat in geojson['features']
]
...
I would like to do this for all my geojson (which have different structures) with the component dl.GeoJSON because it should render faster than my method. Is it possible ? I tried some javascript with onEachFeature but didn't succeed.
Thanks
The simplest solution would be to add a feature named popup with the desired popup content, as the GeoJSON component will render it as a popup automatically,
import dash_leaflet as dl
import dash_leaflet.express as dlx
data = dlx.dicts_to_geojson([dict(lat=-37.8, lon=175.6, popup="I am a popup")])
geojson = dl.GeoJSON(data=data)
If you need more customization options and/or prefer not to add properties (e.g. for performance reasons), you would need to implement a custom onEachFeature function. If you create a .js file in your assets folder with content like,
window.someNamespace = Object.assign({}, window.someNamespace, {
someSubNamespace: {
bindPopup: function(feature, layer) {
const props = feature.properties;
delete props.cluster;
layer.bindPopup(JSON.stringify(props))
}
}
});
you can bind the function like this,
import dash_leaflet as dl
from dash_extensions.javascript import Namespace
ns = Namespace("someNamespace", "someSubNamespace")
geojson = dl.GeoJSON(data=data, options=dict(onEachFeature=ns("bindPopup")))
In the above code examples i am using dash-leaflet==0.1.10 and dash-extensions==0.0.33.

BERT tokenizer & model download

I`m beginner.. I'm working with Bert. However, due to the security of the company network, the following code does not receive the bert model directly.
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased', do_lower_case=False)
model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-cased", num_labels=2)
So I think I have to download these files and enter the location manually.
But I'm new to this, and I'm wondering if it's simple to download a format like .py from github and put it in a location.
I'm currently using the bert model implemented by hugging face's pytorch, and the address of the source file I found is:
https://github.com/huggingface/transformers
Please let me know if the method I thought is correct, and if so, what file to get.
Thanks in advance for the comment.
As described here, what you need to do are download pre_train and configs, then putting them in the same folder. Every model has a pair of links, you might want to take a look at lib code.
For instance
import torch
from transformers import *
model = BertModel.from_pretrained('/Users/yourname/workplace/berts/')
with /Users/yourname/workplace/berts/ refer to your folder
Below are what I found
at src/transformers/configuration_bert.py there are a list of models' configs
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json",
"bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json",
"bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json",
"bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-config.json",
"bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-config.json",
"bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-config.json",
"bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json",
"bert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json",
"bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json",
"bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json",
"bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json",
"bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json",
"bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json",
"bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-config.json",
"bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-config.json",
"bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-config.json",
"bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking-config.json",
"bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-config.json",
"bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-config.json",
"bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json",
"bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json",
}
and at src/transformers/modeling_bert.py there are links to pre_trains
BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"bert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin",
"bert-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-pytorch_model.bin",
"bert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-pytorch_model.bin",
"bert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-pytorch_model.bin",
"bert-base-multilingual-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-pytorch_model.bin",
"bert-base-multilingual-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-pytorch_model.bin",
"bert-base-chinese": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-pytorch_model.bin",
"bert-base-german-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-pytorch_model.bin",
"bert-large-uncased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-pytorch_model.bin",
"bert-large-cased-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-pytorch_model.bin",
"bert-large-uncased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-pytorch_model.bin",
"bert-large-cased-whole-word-masking-finetuned-squad": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-pytorch_model.bin",
"bert-base-cased-finetuned-mrpc": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-pytorch_model.bin",
"bert-base-german-dbmdz-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-pytorch_model.bin",
"bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-pytorch_model.bin",
"bert-base-japanese": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-pytorch_model.bin",
"bert-base-japanese-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking-pytorch_model.bin",
"bert-base-japanese-char": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-pytorch_model.bin",
"bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-pytorch_model.bin",
"bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/pytorch_model.bin",
"bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/pytorch_model.bin",
}

Categories

Resources