diff --git a/Python_DE_Lateral/DE_Python_Lateral_Training.ipynb b/Python_DE_Lateral/DE_Python_Lateral_Training.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..698ff9dc9c4ac80c385dc8ea7282e2effa68e2f8 --- /dev/null +++ b/Python_DE_Lateral/DE_Python_Lateral_Training.ipynb @@ -0,0 +1,701 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "B8UEjjSVD-OY" + }, + "source": [ + "## Installing required packages\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "collapsed": true, + "id": "W9jBeoQGPX13", + "jupyter": { + "outputs_hidden": true + }, + "outputId": "9a6efed1-0638-441a-af41-73d7a5b8b8ce" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting apache-beam\n", + " Downloading apache_beam-2.59.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)\n", + "Collecting crcmod<2.0,>=1.7 (from apache-beam)\n", + " Downloading crcmod-1.7.tar.gz (89 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.7/89.7 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting orjson<4,>=3.9.7 (from apache-beam)\n", + " Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m654.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting dill<0.3.2,>=0.3.1.1 (from apache-beam)\n", + " Downloading dill-0.3.1.1.tar.gz (151 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m152.0/152.0 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: cloudpickle~=2.2.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (2.2.1)\n", + "Collecting fastavro<2,>=0.23.6 (from apache-beam)\n", + " Downloading fastavro-1.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)\n", + "Collecting fasteners<1.0,>=0.3 (from apache-beam)\n", + " Downloading fasteners-0.19-py3-none-any.whl.metadata (4.9 kB)\n", + "Requirement already satisfied: grpcio!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,<2,>=1.33.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (1.64.1)\n", + "Collecting hdfs<3.0.0,>=2.1.0 (from apache-beam)\n", + " Downloading hdfs-2.7.3.tar.gz (43 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.5/43.5 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: httplib2<0.23.0,>=0.8 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (0.22.0)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (4.23.0)\n", + "Requirement already satisfied: jsonpickle<4.0.0,>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (3.3.0)\n", + "Requirement already satisfied: numpy<1.27.0,>=1.14.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (1.26.4)\n", + "Collecting objsize<0.8.0,>=0.6.1 (from apache-beam)\n", + " Downloading objsize-0.7.0-py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: packaging>=22.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (24.1)\n", + "Collecting pymongo<5.0.0,>=3.8.0 (from apache-beam)\n", + " Downloading pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)\n", + "Requirement already satisfied: proto-plus<2,>=1.7.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (1.24.0)\n", + "Requirement already satisfied: protobuf!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*,<4.26.0,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (3.20.3)\n", + "Collecting pydot<2,>=1.2.0 (from apache-beam)\n", + " Downloading pydot-1.4.2-py2.py3-none-any.whl.metadata (8.0 kB)\n", + "Requirement already satisfied: python-dateutil<3,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (2.8.2)\n", + "Requirement already satisfied: pytz>=2018.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (2024.2)\n", + "Collecting redis<6,>=5.0.0 (from apache-beam)\n", + " Downloading redis-5.1.0-py3-none-any.whl.metadata (9.1 kB)\n", + "Requirement already satisfied: regex>=2020.6.8 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (2024.9.11)\n", + "Requirement already satisfied: requests<3.0.0,>=2.24.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (2.32.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (4.12.2)\n", + "Collecting zstandard<1,>=0.18.0 (from apache-beam)\n", + " Downloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: pyarrow<17.0.0,>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (16.1.0)\n", + "Requirement already satisfied: pyarrow-hotfix<1 in /usr/local/lib/python3.10/dist-packages (from apache-beam) (0.6)\n", + "Collecting js2py<1,>=0.74 (from apache-beam)\n", + " Downloading Js2Py-0.74-py3-none-any.whl.metadata (868 bytes)\n", + "Collecting docopt (from hdfs<3.0.0,>=2.1.0->apache-beam)\n", + " Downloading docopt-0.6.2.tar.gz (25 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from hdfs<3.0.0,>=2.1.0->apache-beam) (1.16.0)\n", + "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /usr/local/lib/python3.10/dist-packages (from httplib2<0.23.0,>=0.8->apache-beam) (3.1.4)\n", + "Requirement already satisfied: tzlocal>=1.2 in /usr/local/lib/python3.10/dist-packages (from js2py<1,>=0.74->apache-beam) (5.2)\n", + "Collecting pyjsparser>=2.5.1 (from js2py<1,>=0.74->apache-beam)\n", + " Downloading pyjsparser-2.7.1.tar.gz (24 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam) (0.20.0)\n", + "Collecting dnspython<3.0.0,>=1.16.0 (from pymongo<5.0.0,>=3.8.0->apache-beam)\n", + " Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)\n", + "Requirement already satisfied: async-timeout>=4.0.3 in /usr/local/lib/python3.10/dist-packages (from redis<6,>=5.0.0->apache-beam) (4.0.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam) (2024.8.30)\n", + "Downloading apache_beam-2.59.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.6/15.6 MB\u001b[0m \u001b[31m67.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fastavro-1.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m62.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fasteners-0.19-py3-none-any.whl (18 kB)\n", + "Downloading Js2Py-0.74-py3-none-any.whl (1.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m42.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading objsize-0.7.0-py3-none-any.whl (11 kB)\n", + "Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pydot-1.4.2-py2.py3-none-any.whl (21 kB)\n", + "Downloading pymongo-4.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading redis-5.1.0-py3-none-any.whl (261 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m261.2/261.2 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m66.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: crcmod, dill, hdfs, pyjsparser, docopt\n", + " Building wheel for crcmod (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for crcmod: filename=crcmod-1.7-cp310-cp310-linux_x86_64.whl size=31404 sha256=47dc3451b06ac3201ab1bc72b40f38c2fbdc7a4ec7f4eb16809171c7f5ad680c\n", + " Stored in directory: /root/.cache/pip/wheels/85/4c/07/72215c529bd59d67e3dac29711d7aba1b692f543c808ba9e86\n", + " Building wheel for dill (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for dill: filename=dill-0.3.1.1-py3-none-any.whl size=78542 sha256=3f3c64766dc34730bb2ab29a10ac85c747587342b2b6656123c8437cce8a281a\n", + " Stored in directory: /root/.cache/pip/wheels/ea/e2/86/64980d90e297e7bf2ce588c2b96e818f5399c515c4bb8a7e4f\n", + " Building wheel for hdfs (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for hdfs: filename=hdfs-2.7.3-py3-none-any.whl size=34325 sha256=8d1a31bca252ca0170842d140abbdb3c77a75e8f4bdb7207c94b632cfc758434\n", + " Stored in directory: /root/.cache/pip/wheels/e5/8d/b6/99c1c0a3ac5788c866b0ecd3f48b0134a5910e6ed26011800b\n", + " Building wheel for pyjsparser (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pyjsparser: filename=pyjsparser-2.7.1-py3-none-any.whl size=25983 sha256=6be8e58dba3902a3e4f5850f4093cac8357a584f79626762b99823018c157c06\n", + " Stored in directory: /root/.cache/pip/wheels/5e/81/26/5956478df303e2bf5a85a5df595bb307bd25948a4bab69f7c7\n", + " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13704 sha256=4f8d8d3fa9fa3f5190772934989f590858526810e7919cbe546dc5753b5701ec\n", + " Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n", + "Successfully built crcmod dill hdfs pyjsparser docopt\n", + "Installing collected packages: pyjsparser, docopt, crcmod, zstandard, redis, pydot, orjson, objsize, js2py, fasteners, fastavro, dnspython, dill, pymongo, hdfs, apache-beam\n", + " Attempting uninstall: pydot\n", + " Found existing installation: pydot 3.0.2\n", + " Uninstalling pydot-3.0.2:\n", + " Successfully uninstalled pydot-3.0.2\n", + "Successfully installed apache-beam-2.59.0 crcmod-1.7 dill-0.3.1.1 dnspython-2.6.1 docopt-0.6.2 fastavro-1.9.7 fasteners-0.19 hdfs-2.7.3 js2py-0.74 objsize-0.7.0 orjson-3.10.7 pydot-1.4.2 pyjsparser-2.7.1 pymongo-4.10.1 redis-5.1.0 zstandard-0.23.0\n", + "Collecting google-cloud-storage\n", + " Downloading google_cloud_storage-2.18.2-py2.py3-none-any.whl.metadata (9.1 kB)\n", + "Collecting google-auth<3.0dev,>=2.26.1 (from google-cloud-storage)\n", + " Downloading google_auth-2.35.0-py2.py3-none-any.whl.metadata (4.7 kB)\n", + "Collecting google-api-core<3.0.0dev,>=2.15.0 (from google-cloud-storage)\n", + " Downloading google_api_core-2.20.0-py3-none-any.whl.metadata (2.7 kB)\n", + "Collecting google-cloud-core<3.0dev,>=2.3.0 (from google-cloud-storage)\n", + " Using cached google_cloud_core-2.4.1-py2.py3-none-any.whl.metadata (2.7 kB)\n", + "Collecting google-resumable-media>=2.7.2 (from google-cloud-storage)\n", + " Using cached google_resumable_media-2.7.2-py2.py3-none-any.whl.metadata (2.2 kB)\n", + "Collecting requests<3.0.0dev,>=2.18.0 (from google-cloud-storage)\n", + " Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting google-crc32c<2.0dev,>=1.0 (from google-cloud-storage)\n", + " Using cached google_crc32c-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)\n", + "Collecting googleapis-common-protos<2.0.dev0,>=1.56.2 (from google-api-core<3.0.0dev,>=2.15.0->google-cloud-storage)\n", + " Using cached googleapis_common_protos-1.65.0-py2.py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0.dev0,>=3.19.5 (from google-api-core<3.0.0dev,>=2.15.0->google-cloud-storage)\n", + " Downloading protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)\n", + "Collecting proto-plus<2.0.0dev,>=1.22.3 (from google-api-core<3.0.0dev,>=2.15.0->google-cloud-storage)\n", + " Using cached proto_plus-1.24.0-py3-none-any.whl.metadata (2.2 kB)\n", + "Collecting cachetools<6.0,>=2.0.0 (from google-auth<3.0dev,>=2.26.1->google-cloud-storage)\n", + " Using cached cachetools-5.5.0-py3-none-any.whl.metadata (5.3 kB)\n", + "Collecting pyasn1-modules>=0.2.1 (from google-auth<3.0dev,>=2.26.1->google-cloud-storage)\n", + " Using cached pyasn1_modules-0.4.1-py3-none-any.whl.metadata (3.5 kB)\n", + "Collecting rsa<5,>=3.1.4 (from google-auth<3.0dev,>=2.26.1->google-cloud-storage)\n", + " Using cached rsa-4.9-py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting charset-normalizer<4,>=2 (from requests<3.0.0dev,>=2.18.0->google-cloud-storage)\n", + " Using cached charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (33 kB)\n", + "Collecting idna<4,>=2.5 (from requests<3.0.0dev,>=2.18.0->google-cloud-storage)\n", + " Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n", + "Collecting urllib3<3,>=1.21.1 (from requests<3.0.0dev,>=2.18.0->google-cloud-storage)\n", + " Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)\n", + "Collecting certifi>=2017.4.17 (from requests<3.0.0dev,>=2.18.0->google-cloud-storage)\n", + " Using cached certifi-2024.8.30-py3-none-any.whl.metadata (2.2 kB)\n", + "Collecting pyasn1<0.7.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=2.26.1->google-cloud-storage)\n", + " Using cached pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)\n", + "Downloading google_cloud_storage-2.18.2-py2.py3-none-any.whl (130 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.5/130.5 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_api_core-2.20.0-py3-none-any.whl (142 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.2/142.2 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_auth-2.35.0-py2.py3-none-any.whl (208 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.0/209.0 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached google_cloud_core-2.4.1-py2.py3-none-any.whl (29 kB)\n", + "Using cached google_crc32c-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37 kB)\n", + "Using cached google_resumable_media-2.7.2-py2.py3-none-any.whl (81 kB)\n", + "Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n", + "Using cached cachetools-5.5.0-py3-none-any.whl (9.5 kB)\n", + "Using cached certifi-2024.8.30-py3-none-any.whl (167 kB)\n", + "Using cached charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", + "Using cached googleapis_common_protos-1.65.0-py2.py3-none-any.whl (220 kB)\n", + "Using cached idna-3.10-py3-none-any.whl (70 kB)\n", + "Using cached proto_plus-1.24.0-py3-none-any.whl (50 kB)\n", + "Downloading protobuf-5.28.2-cp38-abi3-manylinux2014_x86_64.whl (316 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.6/316.6 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached pyasn1_modules-0.4.1-py3-none-any.whl (181 kB)\n", + "Using cached rsa-4.9-py3-none-any.whl (34 kB)\n", + "Using cached urllib3-2.2.3-py3-none-any.whl (126 kB)\n", + "Using cached pyasn1-0.6.1-py3-none-any.whl (83 kB)\n", + "Installing collected packages: urllib3, pyasn1, protobuf, idna, google-crc32c, charset-normalizer, certifi, cachetools, rsa, requests, pyasn1-modules, proto-plus, googleapis-common-protos, google-resumable-media, google-auth, google-api-core, google-cloud-core, google-cloud-storage\n", + " Attempting uninstall: urllib3\n", + " Found existing installation: urllib3 2.2.3\n", + " Uninstalling urllib3-2.2.3:\n", + " Successfully uninstalled urllib3-2.2.3\n", + " Attempting uninstall: pyasn1\n", + " Found existing installation: pyasn1 0.6.1\n", + " Uninstalling pyasn1-0.6.1:\n", + " Successfully uninstalled pyasn1-0.6.1\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 3.20.3\n", + " Uninstalling protobuf-3.20.3:\n", + " Successfully uninstalled protobuf-3.20.3\n", + " Attempting uninstall: idna\n", + " Found existing installation: idna 3.10\n", + " Uninstalling idna-3.10:\n", + " Successfully uninstalled idna-3.10\n", + " Attempting uninstall: google-crc32c\n", + " Found existing installation: google-crc32c 1.6.0\n", + " Uninstalling google-crc32c-1.6.0:\n", + " Successfully uninstalled google-crc32c-1.6.0\n", + " Attempting uninstall: charset-normalizer\n", + " Found existing installation: charset-normalizer 3.3.2\n", + " Uninstalling charset-normalizer-3.3.2:\n", + " Successfully uninstalled charset-normalizer-3.3.2\n", + " Attempting uninstall: certifi\n", + " Found existing installation: certifi 2024.8.30\n", + " Uninstalling certifi-2024.8.30:\n", + " Successfully uninstalled certifi-2024.8.30\n", + " Attempting uninstall: cachetools\n", + " Found existing installation: cachetools 5.5.0\n", + " Uninstalling cachetools-5.5.0:\n", + " Successfully uninstalled cachetools-5.5.0\n", + " Attempting uninstall: rsa\n", + " Found existing installation: rsa 4.9\n", + " Uninstalling rsa-4.9:\n", + " Successfully uninstalled rsa-4.9\n", + " Attempting uninstall: requests\n", + " Found existing installation: requests 2.32.3\n", + " Uninstalling requests-2.32.3:\n", + " Successfully uninstalled requests-2.32.3\n", + " Attempting uninstall: pyasn1-modules\n", + " Found existing installation: pyasn1_modules 0.4.1\n", + " Uninstalling pyasn1_modules-0.4.1:\n", + " Successfully uninstalled pyasn1_modules-0.4.1\n", + " Attempting uninstall: proto-plus\n", + " Found existing installation: proto-plus 1.24.0\n", + " Uninstalling proto-plus-1.24.0:\n", + " Successfully uninstalled proto-plus-1.24.0\n", + " Attempting uninstall: googleapis-common-protos\n", + " Found existing installation: googleapis-common-protos 1.65.0\n", + " Uninstalling googleapis-common-protos-1.65.0:\n", + " Successfully uninstalled googleapis-common-protos-1.65.0\n", + " Attempting uninstall: google-resumable-media\n", + " Found existing installation: google-resumable-media 2.7.2\n", + " Uninstalling google-resumable-media-2.7.2:\n", + " Successfully uninstalled google-resumable-media-2.7.2\n", + " Attempting uninstall: google-auth\n", + " Found existing installation: google-auth 2.27.0\n", + " Uninstalling google-auth-2.27.0:\n", + " Successfully uninstalled google-auth-2.27.0\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 2.19.2\n", + " Uninstalling google-api-core-2.19.2:\n", + " Successfully uninstalled google-api-core-2.19.2\n", + " Attempting uninstall: google-cloud-core\n", + " Found existing installation: google-cloud-core 2.4.1\n", + " Uninstalling google-cloud-core-2.4.1:\n", + " Successfully uninstalled google-cloud-core-2.4.1\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 2.8.0\n", + " Uninstalling google-cloud-storage-2.8.0:\n", + " Successfully uninstalled google-cloud-storage-2.8.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "apache-beam 2.59.0 requires protobuf!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*,<4.26.0,>=3.20.3, but you have protobuf 5.28.2 which is incompatible.\n", + "google-ai-generativelanguage 0.6.6 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.\n", + "google-cloud-datastore 2.19.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.\n", + "google-cloud-firestore 2.16.1 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.\n", + "google-colab 1.0.0 requires google-auth==2.27.0, but you have google-auth 2.35.0 which is incompatible.\n", + "tensorboard 2.17.0 requires protobuf!=4.24.0,<5.0.0,>=3.19.6, but you have protobuf 5.28.2 which is incompatible.\n", + "tensorflow 2.17.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.28.2 which is incompatible.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 5.28.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed cachetools-5.5.0 certifi-2024.8.30 charset-normalizer-3.3.2 google-api-core-2.20.0 google-auth-2.35.0 google-cloud-core-2.4.1 google-cloud-storage-2.18.2 google-crc32c-1.6.0 google-resumable-media-2.7.2 googleapis-common-protos-1.65.0 idna-3.10 proto-plus-1.24.0 protobuf-5.28.2 pyasn1-0.6.1 pyasn1-modules-0.4.1 requests-2.32.3 rsa-4.9 urllib3-2.2.3\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "certifi", + "google" + ] + }, + "id": "84ba0c331d1c470bbc9cd51ca426dedc" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: apache-beam[gcp] in /usr/local/lib/python3.10/dist-packages (2.59.0)\n", + "Requirement already satisfied: crcmod<2.0,>=1.7 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.7)\n", + "Requirement already satisfied: orjson<4,>=3.9.7 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (3.10.7)\n", + "Requirement already satisfied: dill<0.3.2,>=0.3.1.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.3.1.1)\n", + "Requirement already satisfied: cloudpickle~=2.2.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.2.1)\n", + "Requirement already satisfied: fastavro<2,>=0.23.6 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.9.7)\n", + "Requirement already satisfied: fasteners<1.0,>=0.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.19)\n", + "Requirement already satisfied: grpcio!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,<2,>=1.33.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.64.1)\n", + "Requirement already satisfied: hdfs<3.0.0,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.7.3)\n", + "Requirement already satisfied: httplib2<0.23.0,>=0.8 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.22.0)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (4.23.0)\n", + "Requirement already satisfied: jsonpickle<4.0.0,>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (3.3.0)\n", + "Requirement already satisfied: numpy<1.27.0,>=1.14.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.26.4)\n", + "Requirement already satisfied: objsize<0.8.0,>=0.6.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.7.0)\n", + "Requirement already satisfied: packaging>=22.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (24.1)\n", + "Requirement already satisfied: pymongo<5.0.0,>=3.8.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (4.10.1)\n", + "Requirement already satisfied: proto-plus<2,>=1.7.1 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.24.0)\n", + "Collecting protobuf!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*,<4.26.0,>=3.20.3 (from apache-beam[gcp])\n", + " Downloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n", + "Requirement already satisfied: pydot<2,>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.4.2)\n", + "Requirement already satisfied: python-dateutil<3,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2018.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2024.2)\n", + "Requirement already satisfied: redis<6,>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (5.1.0)\n", + "Requirement already satisfied: regex>=2020.6.8 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2024.9.11)\n", + "Requirement already satisfied: requests<3.0.0,>=2.24.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.32.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (4.12.2)\n", + "Requirement already satisfied: zstandard<1,>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.23.0)\n", + "Requirement already satisfied: pyarrow<17.0.0,>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (16.1.0)\n", + "Requirement already satisfied: pyarrow-hotfix<1 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.6)\n", + "Requirement already satisfied: js2py<1,>=0.74 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.74)\n", + "Requirement already satisfied: cachetools<6,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (5.5.0)\n", + "Requirement already satisfied: google-api-core<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.20.0)\n", + "Collecting google-apitools<0.5.32,>=0.5.31 (from apache-beam[gcp])\n", + " Downloading google-apitools-0.5.31.tar.gz (173 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.5/173.5 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: google-auth<3,>=1.18.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.35.0)\n", + "Requirement already satisfied: google-auth-httplib2<0.3.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (0.2.0)\n", + "Requirement already satisfied: google-cloud-datastore<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.19.0)\n", + "Requirement already satisfied: google-cloud-pubsub<3,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.25.1)\n", + "Collecting google-cloud-pubsublite<2,>=1.2.0 (from apache-beam[gcp])\n", + " Downloading google_cloud_pubsublite-1.11.1-py2.py3-none-any.whl.metadata (5.6 kB)\n", + "Requirement already satisfied: google-cloud-storage<3,>=2.18.2 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.18.2)\n", + "Requirement already satisfied: google-cloud-bigquery<4,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (3.25.0)\n", + "Requirement already satisfied: google-cloud-bigquery-storage<3,>=2.6.3 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.26.0)\n", + "Requirement already satisfied: google-cloud-core<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.4.1)\n", + "Requirement already satisfied: google-cloud-bigtable<3,>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.26.0)\n", + "Collecting google-cloud-spanner<4,>=3.0.0 (from apache-beam[gcp])\n", + " Downloading google_cloud_spanner-3.49.1-py2.py3-none-any.whl.metadata (10 kB)\n", + "Collecting google-cloud-dlp<4,>=3.0.0 (from apache-beam[gcp])\n", + " Downloading google_cloud_dlp-3.23.0-py2.py3-none-any.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: google-cloud-language<3,>=2.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (2.13.4)\n", + "Collecting google-cloud-videointelligence<3,>=2.0 (from apache-beam[gcp])\n", + " Downloading google_cloud_videointelligence-2.13.5-py2.py3-none-any.whl.metadata (5.7 kB)\n", + "Collecting google-cloud-vision<4,>=2 (from apache-beam[gcp])\n", + " Downloading google_cloud_vision-3.7.4-py2.py3-none-any.whl.metadata (5.2 kB)\n", + "Collecting google-cloud-recommendations-ai<0.11.0,>=0.1.0 (from apache-beam[gcp])\n", + " Downloading google_cloud_recommendations_ai-0.10.12-py2.py3-none-any.whl.metadata (5.3 kB)\n", + "Requirement already satisfied: google-cloud-aiplatform<2.0,>=1.26.0 in /usr/local/lib/python3.10/dist-packages (from apache-beam[gcp]) (1.68.0)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core<3,>=2.0.0->apache-beam[gcp]) (1.65.0)\n", + "Requirement already satisfied: oauth2client>=1.4.12 in /usr/local/lib/python3.10/dist-packages (from google-apitools<0.5.32,>=0.5.31->apache-beam[gcp]) (4.1.3)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from google-apitools<0.5.32,>=0.5.31->apache-beam[gcp]) (1.16.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.18.0->apache-beam[gcp]) (0.4.1)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.18.0->apache-beam[gcp]) (4.9)\n", + "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform<2.0,>=1.26.0->apache-beam[gcp]) (1.12.5)\n", + "Requirement already satisfied: shapely<3.0.0dev in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform<2.0,>=1.26.0->apache-beam[gcp]) (2.0.6)\n", + "Requirement already satisfied: pydantic<3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform<2.0,>=1.26.0->apache-beam[gcp]) (2.9.2)\n", + "Requirement already satisfied: docstring-parser<1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-aiplatform<2.0,>=1.26.0->apache-beam[gcp]) (0.16)\n", + "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery<4,>=2.0.0->apache-beam[gcp]) (2.7.2)\n", + "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigtable<3,>=2.19.0->apache-beam[gcp]) (0.13.1)\n", + "Requirement already satisfied: grpcio-status>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (1.48.2)\n", + "Requirement already satisfied: opentelemetry-api>=1.27.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (1.27.0)\n", + "Requirement already satisfied: opentelemetry-sdk>=1.27.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (1.27.0)\n", + "Collecting overrides<8.0.0,>=6.0.1 (from google-cloud-pubsublite<2,>=1.2.0->apache-beam[gcp])\n", + " Downloading overrides-7.7.0-py3-none-any.whl.metadata (5.8 kB)\n", + "Requirement already satisfied: sqlparse>=0.4.4 in /usr/local/lib/python3.10/dist-packages (from google-cloud-spanner<4,>=3.0.0->apache-beam[gcp]) (0.5.1)\n", + "Collecting grpc-interceptor>=0.15.4 (from google-cloud-spanner<4,>=3.0.0->apache-beam[gcp])\n", + " Downloading grpc_interceptor-0.15.4-py3-none-any.whl.metadata (8.4 kB)\n", + "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage<3,>=2.18.2->apache-beam[gcp]) (1.6.0)\n", + "Requirement already satisfied: docopt in /usr/local/lib/python3.10/dist-packages (from hdfs<3.0.0,>=2.1.0->apache-beam[gcp]) (0.6.2)\n", + "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /usr/local/lib/python3.10/dist-packages (from httplib2<0.23.0,>=0.8->apache-beam[gcp]) (3.1.4)\n", + "Requirement already satisfied: tzlocal>=1.2 in /usr/local/lib/python3.10/dist-packages (from js2py<1,>=0.74->apache-beam[gcp]) (5.2)\n", + "Requirement already satisfied: pyjsparser>=2.5.1 in /usr/local/lib/python3.10/dist-packages (from js2py<1,>=0.74->apache-beam[gcp]) (2.7.1)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam[gcp]) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam[gcp]) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam[gcp]) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema<5.0.0,>=4.0.0->apache-beam[gcp]) (0.20.0)\n", + "Requirement already satisfied: dnspython<3.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from pymongo<5.0.0,>=3.8.0->apache-beam[gcp]) (2.6.1)\n", + "Requirement already satisfied: async-timeout>=4.0.3 in /usr/local/lib/python3.10/dist-packages (from redis<6,>=5.0.0->apache-beam[gcp]) (4.0.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam[gcp]) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam[gcp]) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam[gcp]) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.24.0->apache-beam[gcp]) (2024.8.30)\n", + "Requirement already satisfied: pyasn1>=0.1.7 in /usr/local/lib/python3.10/dist-packages (from oauth2client>=1.4.12->google-apitools<0.5.32,>=0.5.31->apache-beam[gcp]) (0.6.1)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.27.0->google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (1.2.14)\n", + "Requirement already satisfied: importlib-metadata<=8.4.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.27.0->google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (8.4.0)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.48b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk>=1.27.0->google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (0.48b0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3->google-cloud-aiplatform<2.0,>=1.26.0->apache-beam[gcp]) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3->google-cloud-aiplatform<2.0,>=1.26.0->apache-beam[gcp]) (2.23.4)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.6->opentelemetry-api>=1.27.0->google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (1.16.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=8.4.0,>=6.0->opentelemetry-api>=1.27.0->google-cloud-pubsub<3,>=2.1.0->apache-beam[gcp]) (3.20.2)\n", + "Downloading google_cloud_dlp-3.23.0-py2.py3-none-any.whl (193 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.8/193.8 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_pubsublite-1.11.1-py2.py3-none-any.whl (304 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.6/304.6 kB\u001b[0m \u001b[31m22.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_recommendations_ai-0.10.12-py2.py3-none-any.whl (184 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m184.7/184.7 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_spanner-3.49.1-py2.py3-none-any.whl (402 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.7/402.7 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_videointelligence-2.13.5-py2.py3-none-any.whl (244 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m245.0/245.0 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_cloud_vision-3.7.4-py2.py3-none-any.whl (467 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m467.5/467.5 kB\u001b[0m \u001b[31m37.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m21.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading grpc_interceptor-0.15.4-py3-none-any.whl (20 kB)\n", + "Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n", + "Building wheels for collected packages: google-apitools\n", + " Building wheel for google-apitools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for google-apitools: filename=google_apitools-0.5.31-py3-none-any.whl size=131014 sha256=3b23230511e396ebd4aa2974cdf1dacf21f587956bf6a4c67d26d07388adb463\n", + " Stored in directory: /root/.cache/pip/wheels/04/b7/e0/9712f8c23a5da3d9d16fb88216b897bf60e85b12f5470f26ee\n", + "Successfully built google-apitools\n", + "Installing collected packages: protobuf, overrides, grpc-interceptor, google-apitools, google-cloud-vision, google-cloud-videointelligence, google-cloud-spanner, google-cloud-recommendations-ai, google-cloud-dlp, google-cloud-pubsublite\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 5.28.2\n", + " Uninstalling protobuf-5.28.2:\n", + " Successfully uninstalled protobuf-5.28.2\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow-metadata 1.15.0 requires protobuf<4.21,>=3.20.3; python_version < \"3.11\", but you have protobuf 4.25.5 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed google-apitools-0.5.31 google-cloud-dlp-3.23.0 google-cloud-pubsublite-1.11.1 google-cloud-recommendations-ai-0.10.12 google-cloud-spanner-3.49.1 google-cloud-videointelligence-2.13.5 google-cloud-vision-3.7.4 grpc-interceptor-0.15.4 overrides-7.7.0 protobuf-4.25.5\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "google" + ] + }, + "id": "9480526003814e7ea17486e220ad624f" + } + }, + "metadata": {} + } + ], + "source": [ + "!pip install apache-beam\n", + "!pip install --force-reinstall google-cloud-storage\n", + "!pip install apache-beam[gcp]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YEGPJBcgECyp" + }, + "source": [ + "## Importing packages\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "uC24Ht9pVJ2-" + }, + "outputs": [], + "source": [ + "# authenticating user\n", + "from google.colab import auth\n", + "auth.authenticate_user()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "tlczfgZ-PicY" + }, + "outputs": [], + "source": [ + "from apache_beam.options.pipeline_options import PipelineOptions\n", + "import apache_beam as beam\n", + "from apache_beam.io import ReadFromText, WriteToBigQuery\n", + "from google.cloud import bigquery" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jWWwjGIYEmI9" + }, + "source": [ + "## BigQuery table_schema initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "qUeuW5vsjio4" + }, + "outputs": [], + "source": [ + "\n", + "table_schema = {\n", + " 'fields': [\n", + " {\n", + " \"name\": \"student_id\",\n", + " \"mode\": \"REQUIRED\",\n", + " \"type\": \"STRING\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " },\n", + " {\n", + " \"name\": \"study_hours_per_week\",\n", + " \"mode\": \"NULLABLE\",\n", + " \"type\": \"FLOAT\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " },\n", + " {\n", + " \"name\": \"attendance_rate\",\n", + " \"mode\": \"NULLABLE\",\n", + " \"type\": \"FLOAT\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " },\n", + " {\n", + " \"name\": \"previous_grades\",\n", + " \"mode\": \"NULLABLE\",\n", + " \"type\": \"FLOAT\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " },\n", + " {\n", + " \"name\": \"participation_in_extracurricular_activities\",\n", + " \"mode\": \"NULLABLE\",\n", + " \"type\": \"BOOLEAN\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " },\n", + " {\n", + " \"name\": \"parent_education_level\",\n", + " \"mode\": \"NULLABLE\",\n", + " \"type\": \"STRING\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " },\n", + " {\n", + " \"name\": \"passed\",\n", + " \"mode\": \"NULLABLE\",\n", + " \"type\": \"BOOLEAN\",\n", + " \"description\": \"\",\n", + " \"fields\": []\n", + " }\n", + "]\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ml7eqZQLS_bB" + }, + "source": [ + "## Pipeline Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "EeWe2_IJnvB7" + }, + "outputs": [], + "source": [ + "def parse_csv_to_dict(line):\n", + " element_list = line.split(',')\n", + " new_row = {}\n", + " for i, field_data in enumerate(table_schema['fields']):\n", + " new_row[field_data[\"name\"]] = element_list[i]\n", + " return [new_row]\n", + "\n", + "\n", + "def run_pipeline(beam_options, input_file, project_id, dataset_id, table_name):\n", + "\n", + " with beam.Pipeline(options=beam_options) as pipeline:\n", + "\n", + " table_spec = f'{project_id}:{dataset_id}.{table_name}'\n", + "\n", + " # Read CSV data from the GCS\n", + " data = pipeline | 'ReadFromText' >> ReadFromText(input_file, skip_header_lines=1) \\\n", + " | 'Parse CSV' >> beam.ParDo(parse_csv_to_dict)\n", + " data = data | 'Remove nan' >> beam.Filter(lambda row: all(value is not None and value != 'nan' for value in row.values()))\n", + " # data | \"Print\" >> beam.Map(print)\n", + "\n", + " # Write the data to BigQuery\n", + " data | 'WriteToBigQuery' >> WriteToBigQuery(\n", + " table=table_spec,\n", + " schema=table_schema,\n", + " create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,\n", + " write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND\n", + " )\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "-KtrCnqm4Orr", + "jupyter": { + "outputs_hidden": true + }, + "outputId": "f6f6691f-9dad-4df9-9bdf-50d67a4b4181" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n", + "WARNING:apache_beam.options.pipeline_options:Discarding unparseable args: ['-f', '/root/.local/share/jupyter/runtime/kernel-cf9f0cd3-b77c-49a0-83d9-a43dbc665ab2.json']\n" + ] + } + ], + "source": [ + "\n", + "beam_options = PipelineOptions(\n", + " runner='DirectRunner',\n", + " temp_location='gs://niveustraining-bucketname/Durvesh',\n", + "\n", + ")\n", + "\n", + "input_file = 'gs://niveustraining-bucketname/Durvesh/student_performance_prediction.csv'\n", + "project_id = 'niveustraining'\n", + "dataset_id = 'python_DE_assignment'\n", + "table_name = 'student_performance'\n", + "\n", + "run_pipeline(beam_options, input_file, project_id, dataset_id, table_name)" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "IUsK19yV6Uhm" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file