[{"data":1,"prerenderedAt":2794},["ShallowReactive",2],{"content-query-U4mBxLSZtq":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"body":10,"_type":2788,"_id":2789,"_source":2790,"_file":2791,"_stem":2792,"_extension":2793},"\u002Fdocs\u002Ffeatures\u002Fbuilding-data-pipelines","features",false,"","Building Data Pipelines","Design and implement production data pipelines",{"type":11,"children":12,"toc":2753},"root",[13,21,27,34,41,126,131,137,143,314,320,501,507,513,650,656,786,792,968,974,1213,1219,1225,1379,1385,1578,1584,1590,1942,1948,1954,2119,2125,2298,2304,2310,2546,2552,2557,2565,2571,2577,2618,2624,2643,2659,2675,2691,2697,2747],{"type":14,"tag":15,"props":16,"children":18},"element","h1",{"id":17},"building-data-pipelines",[19],{"type":20,"value":8},"text",{"type":14,"tag":22,"props":23,"children":24},"p",{},[25],{"type":20,"value":26},"Learn to design and build reliable data pipelines for production use.",{"type":14,"tag":28,"props":29,"children":31},"h2",{"id":30},"pipeline-architecture",[32],{"type":20,"value":33},"Pipeline Architecture",{"type":14,"tag":35,"props":36,"children":38},"h3",{"id":37},"the-three-layers",[39],{"type":20,"value":40},"The Three Layers",{"type":14,"tag":42,"props":43,"children":47},"pre",{"code":44,"language":45,"meta":7,"className":46,"style":7},"graph TD\n    A[\"INGESTION LAYER\u003Cbr\u002F>━━━━━━━━━\u003Cbr\u002F>Extract from sources\u003Cbr\u002F>SQL • APIs • Files • Streams\"] --> B\n    B[\"TRANSFORMATION LAYER\u003Cbr\u002F>━━━━━━━━━\u003Cbr\u002F>Clean • Validate • Enrich • Aggregate\"] --> C\n    C[\"PRESENTATION LAYER\u003Cbr\u002F>━━━━━━━━━\u003Cbr\u002F>Reports • Dashboards • Alerts\"]\n    \n    style A fill:#1f97d4,stroke:#0b5394,stroke-width:2px,color:#fff\n    style B fill:#ff9900,stroke:#ec7211,stroke-width:2px,color:#fff\n    style C fill:#37475a,stroke:#1f1f1f,stroke-width:2px,color:#fff\n","mermaid","language-mermaid shiki shiki-themes github-dark",[48],{"type":14,"tag":49,"props":50,"children":51},"code",{"__ignoreMap":7},[52,63,72,81,90,99,108,117],{"type":14,"tag":53,"props":54,"children":57},"span",{"class":55,"line":56},"line",1,[58],{"type":14,"tag":53,"props":59,"children":60},{},[61],{"type":20,"value":62},"graph TD\n",{"type":14,"tag":53,"props":64,"children":66},{"class":55,"line":65},2,[67],{"type":14,"tag":53,"props":68,"children":69},{},[70],{"type":20,"value":71},"    A[\"INGESTION LAYER\u003Cbr\u002F>━━━━━━━━━\u003Cbr\u002F>Extract from sources\u003Cbr\u002F>SQL • APIs • Files • Streams\"] --> B\n",{"type":14,"tag":53,"props":73,"children":75},{"class":55,"line":74},3,[76],{"type":14,"tag":53,"props":77,"children":78},{},[79],{"type":20,"value":80},"    B[\"TRANSFORMATION LAYER\u003Cbr\u002F>━━━━━━━━━\u003Cbr\u002F>Clean • Validate • Enrich • Aggregate\"] --> C\n",{"type":14,"tag":53,"props":82,"children":84},{"class":55,"line":83},4,[85],{"type":14,"tag":53,"props":86,"children":87},{},[88],{"type":20,"value":89},"    C[\"PRESENTATION LAYER\u003Cbr\u002F>━━━━━━━━━\u003Cbr\u002F>Reports • Dashboards • Alerts\"]\n",{"type":14,"tag":53,"props":91,"children":93},{"class":55,"line":92},5,[94],{"type":14,"tag":53,"props":95,"children":96},{},[97],{"type":20,"value":98},"    \n",{"type":14,"tag":53,"props":100,"children":102},{"class":55,"line":101},6,[103],{"type":14,"tag":53,"props":104,"children":105},{},[106],{"type":20,"value":107},"    style A fill:#1f97d4,stroke:#0b5394,stroke-width:2px,color:#fff\n",{"type":14,"tag":53,"props":109,"children":111},{"class":55,"line":110},7,[112],{"type":14,"tag":53,"props":113,"children":114},{},[115],{"type":20,"value":116},"    style B fill:#ff9900,stroke:#ec7211,stroke-width:2px,color:#fff\n",{"type":14,"tag":53,"props":118,"children":120},{"class":55,"line":119},8,[121],{"type":14,"tag":53,"props":122,"children":123},{},[124],{"type":20,"value":125},"    style C fill:#37475a,stroke:#1f1f1f,stroke-width:2px,color:#fff\n",{"type":14,"tag":22,"props":127,"children":128},{},[129],{"type":20,"value":130},"Each layer has specific jobs and quality standards.",{"type":14,"tag":28,"props":132,"children":134},{"id":133},"layer-1-ingestion",[135],{"type":20,"value":136},"Layer 1: Ingestion",{"type":14,"tag":35,"props":138,"children":140},{"id":139},"extract-data-from-sources",[141],{"type":20,"value":142},"Extract Data from Sources",{"type":14,"tag":42,"props":144,"children":148},{"code":145,"language":146,"meta":7,"className":147,"style":7},"# SQL databases\ndef extract_from_postgres():\n    query = \"SELECT * FROM customers WHERE active = true\"\n    return pd.read_sql(query, postgres_conn)\n\n# APIs\ndef extract_from_api():\n    response = requests.get('https:\u002F\u002Fapi.external.com\u002Fdata')\n    return pd.DataFrame(response.json())\n\n# Files\ndef extract_from_csv():\n    return pd.read_csv('s3:\u002F\u002Fbucket\u002Fdata.csv')\n\n# Messages\ndef extract_from_kafka():\n    # Stream real-time events\n    for message in kafka_consumer:\n        yield json.loads(message.value)\n","python","language-python shiki shiki-themes github-dark",[149],{"type":14,"tag":49,"props":150,"children":151},{"__ignoreMap":7},[152,160,168,176,184,193,201,209,217,226,234,243,252,261,269,278,287,296,305],{"type":14,"tag":53,"props":153,"children":154},{"class":55,"line":56},[155],{"type":14,"tag":53,"props":156,"children":157},{},[158],{"type":20,"value":159},"# SQL databases\n",{"type":14,"tag":53,"props":161,"children":162},{"class":55,"line":65},[163],{"type":14,"tag":53,"props":164,"children":165},{},[166],{"type":20,"value":167},"def extract_from_postgres():\n",{"type":14,"tag":53,"props":169,"children":170},{"class":55,"line":74},[171],{"type":14,"tag":53,"props":172,"children":173},{},[174],{"type":20,"value":175},"    query = \"SELECT * FROM customers WHERE active = true\"\n",{"type":14,"tag":53,"props":177,"children":178},{"class":55,"line":83},[179],{"type":14,"tag":53,"props":180,"children":181},{},[182],{"type":20,"value":183},"    return pd.read_sql(query, postgres_conn)\n",{"type":14,"tag":53,"props":185,"children":186},{"class":55,"line":92},[187],{"type":14,"tag":53,"props":188,"children":190},{"emptyLinePlaceholder":189},true,[191],{"type":20,"value":192},"\n",{"type":14,"tag":53,"props":194,"children":195},{"class":55,"line":101},[196],{"type":14,"tag":53,"props":197,"children":198},{},[199],{"type":20,"value":200},"# APIs\n",{"type":14,"tag":53,"props":202,"children":203},{"class":55,"line":110},[204],{"type":14,"tag":53,"props":205,"children":206},{},[207],{"type":20,"value":208},"def extract_from_api():\n",{"type":14,"tag":53,"props":210,"children":211},{"class":55,"line":119},[212],{"type":14,"tag":53,"props":213,"children":214},{},[215],{"type":20,"value":216},"    response = requests.get('https:\u002F\u002Fapi.external.com\u002Fdata')\n",{"type":14,"tag":53,"props":218,"children":220},{"class":55,"line":219},9,[221],{"type":14,"tag":53,"props":222,"children":223},{},[224],{"type":20,"value":225},"    return pd.DataFrame(response.json())\n",{"type":14,"tag":53,"props":227,"children":229},{"class":55,"line":228},10,[230],{"type":14,"tag":53,"props":231,"children":232},{"emptyLinePlaceholder":189},[233],{"type":20,"value":192},{"type":14,"tag":53,"props":235,"children":237},{"class":55,"line":236},11,[238],{"type":14,"tag":53,"props":239,"children":240},{},[241],{"type":20,"value":242},"# Files\n",{"type":14,"tag":53,"props":244,"children":246},{"class":55,"line":245},12,[247],{"type":14,"tag":53,"props":248,"children":249},{},[250],{"type":20,"value":251},"def extract_from_csv():\n",{"type":14,"tag":53,"props":253,"children":255},{"class":55,"line":254},13,[256],{"type":14,"tag":53,"props":257,"children":258},{},[259],{"type":20,"value":260},"    return pd.read_csv('s3:\u002F\u002Fbucket\u002Fdata.csv')\n",{"type":14,"tag":53,"props":262,"children":264},{"class":55,"line":263},14,[265],{"type":14,"tag":53,"props":266,"children":267},{"emptyLinePlaceholder":189},[268],{"type":20,"value":192},{"type":14,"tag":53,"props":270,"children":272},{"class":55,"line":271},15,[273],{"type":14,"tag":53,"props":274,"children":275},{},[276],{"type":20,"value":277},"# Messages\n",{"type":14,"tag":53,"props":279,"children":281},{"class":55,"line":280},16,[282],{"type":14,"tag":53,"props":283,"children":284},{},[285],{"type":20,"value":286},"def extract_from_kafka():\n",{"type":14,"tag":53,"props":288,"children":290},{"class":55,"line":289},17,[291],{"type":14,"tag":53,"props":292,"children":293},{},[294],{"type":20,"value":295},"    # Stream real-time events\n",{"type":14,"tag":53,"props":297,"children":299},{"class":55,"line":298},18,[300],{"type":14,"tag":53,"props":301,"children":302},{},[303],{"type":20,"value":304},"    for message in kafka_consumer:\n",{"type":14,"tag":53,"props":306,"children":308},{"class":55,"line":307},19,[309],{"type":14,"tag":53,"props":310,"children":311},{},[312],{"type":20,"value":313},"        yield json.loads(message.value)\n",{"type":14,"tag":35,"props":315,"children":317},{"id":316},"ingestion-quality-checks",[318],{"type":20,"value":319},"Ingestion Quality Checks",{"type":14,"tag":42,"props":321,"children":323},{"code":322,"language":146,"meta":7,"className":147,"style":7},"def validate_ingestion(data):\n    \"\"\"Ensure data was extracted correctly\"\"\"\n    \n    checks = {\n        'not_empty': len(data) > 0,\n        'all_columns': all(col in data.columns for col in EXPECTED_COLS),\n        'no_duplicates': data.duplicated().sum() == 0,\n        'recent_data': data['created_at'].max() > pd.Timestamp.now() - pd.Timedelta(days=1)\n    }\n    \n    failed = [k for k, v in checks.items() if not v]\n    \n    if failed:\n        raise ValueError(f\"Ingestion validation failed: {failed}\")\n    \n    return True\n\n# Use in pipeline\ndef ingest_step():\n    data = extract_from_source()\n    validate_ingestion(data)\n    return data\n",[324],{"type":14,"tag":49,"props":325,"children":326},{"__ignoreMap":7},[327,335,343,350,358,366,374,382,390,398,405,413,420,428,436,443,451,458,466,474,483,492],{"type":14,"tag":53,"props":328,"children":329},{"class":55,"line":56},[330],{"type":14,"tag":53,"props":331,"children":332},{},[333],{"type":20,"value":334},"def validate_ingestion(data):\n",{"type":14,"tag":53,"props":336,"children":337},{"class":55,"line":65},[338],{"type":14,"tag":53,"props":339,"children":340},{},[341],{"type":20,"value":342},"    \"\"\"Ensure data was extracted correctly\"\"\"\n",{"type":14,"tag":53,"props":344,"children":345},{"class":55,"line":74},[346],{"type":14,"tag":53,"props":347,"children":348},{},[349],{"type":20,"value":98},{"type":14,"tag":53,"props":351,"children":352},{"class":55,"line":83},[353],{"type":14,"tag":53,"props":354,"children":355},{},[356],{"type":20,"value":357},"    checks = {\n",{"type":14,"tag":53,"props":359,"children":360},{"class":55,"line":92},[361],{"type":14,"tag":53,"props":362,"children":363},{},[364],{"type":20,"value":365},"        'not_empty': len(data) > 0,\n",{"type":14,"tag":53,"props":367,"children":368},{"class":55,"line":101},[369],{"type":14,"tag":53,"props":370,"children":371},{},[372],{"type":20,"value":373},"        'all_columns': all(col in data.columns for col in EXPECTED_COLS),\n",{"type":14,"tag":53,"props":375,"children":376},{"class":55,"line":110},[377],{"type":14,"tag":53,"props":378,"children":379},{},[380],{"type":20,"value":381},"        'no_duplicates': data.duplicated().sum() == 0,\n",{"type":14,"tag":53,"props":383,"children":384},{"class":55,"line":119},[385],{"type":14,"tag":53,"props":386,"children":387},{},[388],{"type":20,"value":389},"        'recent_data': data['created_at'].max() > pd.Timestamp.now() - pd.Timedelta(days=1)\n",{"type":14,"tag":53,"props":391,"children":392},{"class":55,"line":219},[393],{"type":14,"tag":53,"props":394,"children":395},{},[396],{"type":20,"value":397},"    }\n",{"type":14,"tag":53,"props":399,"children":400},{"class":55,"line":228},[401],{"type":14,"tag":53,"props":402,"children":403},{},[404],{"type":20,"value":98},{"type":14,"tag":53,"props":406,"children":407},{"class":55,"line":236},[408],{"type":14,"tag":53,"props":409,"children":410},{},[411],{"type":20,"value":412},"    failed = [k for k, v in checks.items() if not v]\n",{"type":14,"tag":53,"props":414,"children":415},{"class":55,"line":245},[416],{"type":14,"tag":53,"props":417,"children":418},{},[419],{"type":20,"value":98},{"type":14,"tag":53,"props":421,"children":422},{"class":55,"line":254},[423],{"type":14,"tag":53,"props":424,"children":425},{},[426],{"type":20,"value":427},"    if failed:\n",{"type":14,"tag":53,"props":429,"children":430},{"class":55,"line":263},[431],{"type":14,"tag":53,"props":432,"children":433},{},[434],{"type":20,"value":435},"        raise ValueError(f\"Ingestion validation failed: {failed}\")\n",{"type":14,"tag":53,"props":437,"children":438},{"class":55,"line":271},[439],{"type":14,"tag":53,"props":440,"children":441},{},[442],{"type":20,"value":98},{"type":14,"tag":53,"props":444,"children":445},{"class":55,"line":280},[446],{"type":14,"tag":53,"props":447,"children":448},{},[449],{"type":20,"value":450},"    return True\n",{"type":14,"tag":53,"props":452,"children":453},{"class":55,"line":289},[454],{"type":14,"tag":53,"props":455,"children":456},{"emptyLinePlaceholder":189},[457],{"type":20,"value":192},{"type":14,"tag":53,"props":459,"children":460},{"class":55,"line":298},[461],{"type":14,"tag":53,"props":462,"children":463},{},[464],{"type":20,"value":465},"# Use in pipeline\n",{"type":14,"tag":53,"props":467,"children":468},{"class":55,"line":307},[469],{"type":14,"tag":53,"props":470,"children":471},{},[472],{"type":20,"value":473},"def ingest_step():\n",{"type":14,"tag":53,"props":475,"children":477},{"class":55,"line":476},20,[478],{"type":14,"tag":53,"props":479,"children":480},{},[481],{"type":20,"value":482},"    data = extract_from_source()\n",{"type":14,"tag":53,"props":484,"children":486},{"class":55,"line":485},21,[487],{"type":14,"tag":53,"props":488,"children":489},{},[490],{"type":20,"value":491},"    validate_ingestion(data)\n",{"type":14,"tag":53,"props":493,"children":495},{"class":55,"line":494},22,[496],{"type":14,"tag":53,"props":497,"children":498},{},[499],{"type":20,"value":500},"    return data\n",{"type":14,"tag":28,"props":502,"children":504},{"id":503},"layer-2-transformation",[505],{"type":20,"value":506},"Layer 2: Transformation",{"type":14,"tag":35,"props":508,"children":510},{"id":509},"clean-data",[511],{"type":20,"value":512},"Clean Data",{"type":14,"tag":42,"props":514,"children":516},{"code":515,"language":146,"meta":7,"className":147,"style":7},"def clean_data(data):\n    \"\"\"Remove bad data\"\"\"\n    \n    # Remove duplicates\n    data = data.drop_duplicates()\n    \n    # Remove rows with nulls in critical columns\n    data = data.dropna(subset=['customer_id', 'email'])\n    \n    # Fix data types\n    data['revenue'] = pd.to_numeric(data['revenue'], errors='coerce')\n    data['created_at'] = pd.to_datetime(data['created_at'])\n    \n    # Remove outliers\n    data = data[data['revenue'] \u003C= 1000000]  # Unrealistic values\n    \n    return data\n",[517],{"type":14,"tag":49,"props":518,"children":519},{"__ignoreMap":7},[520,528,536,543,551,559,566,574,582,589,597,605,613,620,628,636,643],{"type":14,"tag":53,"props":521,"children":522},{"class":55,"line":56},[523],{"type":14,"tag":53,"props":524,"children":525},{},[526],{"type":20,"value":527},"def clean_data(data):\n",{"type":14,"tag":53,"props":529,"children":530},{"class":55,"line":65},[531],{"type":14,"tag":53,"props":532,"children":533},{},[534],{"type":20,"value":535},"    \"\"\"Remove bad data\"\"\"\n",{"type":14,"tag":53,"props":537,"children":538},{"class":55,"line":74},[539],{"type":14,"tag":53,"props":540,"children":541},{},[542],{"type":20,"value":98},{"type":14,"tag":53,"props":544,"children":545},{"class":55,"line":83},[546],{"type":14,"tag":53,"props":547,"children":548},{},[549],{"type":20,"value":550},"    # Remove duplicates\n",{"type":14,"tag":53,"props":552,"children":553},{"class":55,"line":92},[554],{"type":14,"tag":53,"props":555,"children":556},{},[557],{"type":20,"value":558},"    data = data.drop_duplicates()\n",{"type":14,"tag":53,"props":560,"children":561},{"class":55,"line":101},[562],{"type":14,"tag":53,"props":563,"children":564},{},[565],{"type":20,"value":98},{"type":14,"tag":53,"props":567,"children":568},{"class":55,"line":110},[569],{"type":14,"tag":53,"props":570,"children":571},{},[572],{"type":20,"value":573},"    # Remove rows with nulls in critical columns\n",{"type":14,"tag":53,"props":575,"children":576},{"class":55,"line":119},[577],{"type":14,"tag":53,"props":578,"children":579},{},[580],{"type":20,"value":581},"    data = data.dropna(subset=['customer_id', 'email'])\n",{"type":14,"tag":53,"props":583,"children":584},{"class":55,"line":219},[585],{"type":14,"tag":53,"props":586,"children":587},{},[588],{"type":20,"value":98},{"type":14,"tag":53,"props":590,"children":591},{"class":55,"line":228},[592],{"type":14,"tag":53,"props":593,"children":594},{},[595],{"type":20,"value":596},"    # Fix data types\n",{"type":14,"tag":53,"props":598,"children":599},{"class":55,"line":236},[600],{"type":14,"tag":53,"props":601,"children":602},{},[603],{"type":20,"value":604},"    data['revenue'] = pd.to_numeric(data['revenue'], errors='coerce')\n",{"type":14,"tag":53,"props":606,"children":607},{"class":55,"line":245},[608],{"type":14,"tag":53,"props":609,"children":610},{},[611],{"type":20,"value":612},"    data['created_at'] = pd.to_datetime(data['created_at'])\n",{"type":14,"tag":53,"props":614,"children":615},{"class":55,"line":254},[616],{"type":14,"tag":53,"props":617,"children":618},{},[619],{"type":20,"value":98},{"type":14,"tag":53,"props":621,"children":622},{"class":55,"line":263},[623],{"type":14,"tag":53,"props":624,"children":625},{},[626],{"type":20,"value":627},"    # Remove outliers\n",{"type":14,"tag":53,"props":629,"children":630},{"class":55,"line":271},[631],{"type":14,"tag":53,"props":632,"children":633},{},[634],{"type":20,"value":635},"    data = data[data['revenue'] \u003C= 1000000]  # Unrealistic values\n",{"type":14,"tag":53,"props":637,"children":638},{"class":55,"line":280},[639],{"type":14,"tag":53,"props":640,"children":641},{},[642],{"type":20,"value":98},{"type":14,"tag":53,"props":644,"children":645},{"class":55,"line":289},[646],{"type":14,"tag":53,"props":647,"children":648},{},[649],{"type":20,"value":500},{"type":14,"tag":35,"props":651,"children":653},{"id":652},"validate-data-quality",[654],{"type":20,"value":655},"Validate Data Quality",{"type":14,"tag":42,"props":657,"children":659},{"code":658,"language":146,"meta":7,"className":147,"style":7},"def validate_data_quality(data):\n    \"\"\"Check transformed data meets standards\"\"\"\n    \n    rules = {\n        'completeness': data.isnull().sum() \u002F len(data) \u003C 0.05,  # \u003C5% nulls\n        'revenue_positive': (data['revenue'] > 0).sum() \u002F len(data) > 0.95,  # >95% positive\n        'date_order': data['created_at'] \u003C= data['last_purchase'],  # Logical order\n        'unique_ids': data.duplicated(subset=['customer_id']).sum() == 0,  # No duplicates\n    }\n    \n    results = {k: v.all() if hasattr(v, 'all') else v for k, v in rules.items()}\n    \n    if not all(results.values()):\n        raise ValueError(f\"Quality validation failed: {results}\")\n    \n    return results\n",[660],{"type":14,"tag":49,"props":661,"children":662},{"__ignoreMap":7},[663,671,679,686,694,702,710,718,726,733,740,748,755,763,771,778],{"type":14,"tag":53,"props":664,"children":665},{"class":55,"line":56},[666],{"type":14,"tag":53,"props":667,"children":668},{},[669],{"type":20,"value":670},"def validate_data_quality(data):\n",{"type":14,"tag":53,"props":672,"children":673},{"class":55,"line":65},[674],{"type":14,"tag":53,"props":675,"children":676},{},[677],{"type":20,"value":678},"    \"\"\"Check transformed data meets standards\"\"\"\n",{"type":14,"tag":53,"props":680,"children":681},{"class":55,"line":74},[682],{"type":14,"tag":53,"props":683,"children":684},{},[685],{"type":20,"value":98},{"type":14,"tag":53,"props":687,"children":688},{"class":55,"line":83},[689],{"type":14,"tag":53,"props":690,"children":691},{},[692],{"type":20,"value":693},"    rules = {\n",{"type":14,"tag":53,"props":695,"children":696},{"class":55,"line":92},[697],{"type":14,"tag":53,"props":698,"children":699},{},[700],{"type":20,"value":701},"        'completeness': data.isnull().sum() \u002F len(data) \u003C 0.05,  # \u003C5% nulls\n",{"type":14,"tag":53,"props":703,"children":704},{"class":55,"line":101},[705],{"type":14,"tag":53,"props":706,"children":707},{},[708],{"type":20,"value":709},"        'revenue_positive': (data['revenue'] > 0).sum() \u002F len(data) > 0.95,  # >95% positive\n",{"type":14,"tag":53,"props":711,"children":712},{"class":55,"line":110},[713],{"type":14,"tag":53,"props":714,"children":715},{},[716],{"type":20,"value":717},"        'date_order': data['created_at'] \u003C= data['last_purchase'],  # Logical order\n",{"type":14,"tag":53,"props":719,"children":720},{"class":55,"line":119},[721],{"type":14,"tag":53,"props":722,"children":723},{},[724],{"type":20,"value":725},"        'unique_ids': data.duplicated(subset=['customer_id']).sum() == 0,  # No duplicates\n",{"type":14,"tag":53,"props":727,"children":728},{"class":55,"line":219},[729],{"type":14,"tag":53,"props":730,"children":731},{},[732],{"type":20,"value":397},{"type":14,"tag":53,"props":734,"children":735},{"class":55,"line":228},[736],{"type":14,"tag":53,"props":737,"children":738},{},[739],{"type":20,"value":98},{"type":14,"tag":53,"props":741,"children":742},{"class":55,"line":236},[743],{"type":14,"tag":53,"props":744,"children":745},{},[746],{"type":20,"value":747},"    results = {k: v.all() if hasattr(v, 'all') else v for k, v in rules.items()}\n",{"type":14,"tag":53,"props":749,"children":750},{"class":55,"line":245},[751],{"type":14,"tag":53,"props":752,"children":753},{},[754],{"type":20,"value":98},{"type":14,"tag":53,"props":756,"children":757},{"class":55,"line":254},[758],{"type":14,"tag":53,"props":759,"children":760},{},[761],{"type":20,"value":762},"    if not all(results.values()):\n",{"type":14,"tag":53,"props":764,"children":765},{"class":55,"line":263},[766],{"type":14,"tag":53,"props":767,"children":768},{},[769],{"type":20,"value":770},"        raise ValueError(f\"Quality validation failed: {results}\")\n",{"type":14,"tag":53,"props":772,"children":773},{"class":55,"line":271},[774],{"type":14,"tag":53,"props":775,"children":776},{},[777],{"type":20,"value":98},{"type":14,"tag":53,"props":779,"children":780},{"class":55,"line":280},[781],{"type":14,"tag":53,"props":782,"children":783},{},[784],{"type":20,"value":785},"    return results\n",{"type":14,"tag":35,"props":787,"children":789},{"id":788},"enrich-data",[790],{"type":20,"value":791},"Enrich Data",{"type":14,"tag":42,"props":793,"children":795},{"code":794,"language":146,"meta":7,"className":147,"style":7},"def enrich_data(data):\n    \"\"\"Add value to data\"\"\"\n    \n    # Join with other data\n    data = data.merge(\n        get_customer_segments(),\n        on='customer_id',\n        how='left'\n    )\n    \n    # Calculate derived fields\n    data['lifetime_value'] = (\n        data['revenue'] * data['months_active']\n    )\n    \n    # Add lookups\n    data['region_name'] = data['region_code'].map(REGION_LOOKUP)\n    \n    # Add timestamps\n    data['processed_at'] = pd.Timestamp.now()\n    \n    return data\n",[796],{"type":14,"tag":49,"props":797,"children":798},{"__ignoreMap":7},[799,807,815,822,830,838,846,854,862,870,877,885,893,901,908,915,923,931,938,946,954,961],{"type":14,"tag":53,"props":800,"children":801},{"class":55,"line":56},[802],{"type":14,"tag":53,"props":803,"children":804},{},[805],{"type":20,"value":806},"def enrich_data(data):\n",{"type":14,"tag":53,"props":808,"children":809},{"class":55,"line":65},[810],{"type":14,"tag":53,"props":811,"children":812},{},[813],{"type":20,"value":814},"    \"\"\"Add value to data\"\"\"\n",{"type":14,"tag":53,"props":816,"children":817},{"class":55,"line":74},[818],{"type":14,"tag":53,"props":819,"children":820},{},[821],{"type":20,"value":98},{"type":14,"tag":53,"props":823,"children":824},{"class":55,"line":83},[825],{"type":14,"tag":53,"props":826,"children":827},{},[828],{"type":20,"value":829},"    # Join with other data\n",{"type":14,"tag":53,"props":831,"children":832},{"class":55,"line":92},[833],{"type":14,"tag":53,"props":834,"children":835},{},[836],{"type":20,"value":837},"    data = data.merge(\n",{"type":14,"tag":53,"props":839,"children":840},{"class":55,"line":101},[841],{"type":14,"tag":53,"props":842,"children":843},{},[844],{"type":20,"value":845},"        get_customer_segments(),\n",{"type":14,"tag":53,"props":847,"children":848},{"class":55,"line":110},[849],{"type":14,"tag":53,"props":850,"children":851},{},[852],{"type":20,"value":853},"        on='customer_id',\n",{"type":14,"tag":53,"props":855,"children":856},{"class":55,"line":119},[857],{"type":14,"tag":53,"props":858,"children":859},{},[860],{"type":20,"value":861},"        how='left'\n",{"type":14,"tag":53,"props":863,"children":864},{"class":55,"line":219},[865],{"type":14,"tag":53,"props":866,"children":867},{},[868],{"type":20,"value":869},"    )\n",{"type":14,"tag":53,"props":871,"children":872},{"class":55,"line":228},[873],{"type":14,"tag":53,"props":874,"children":875},{},[876],{"type":20,"value":98},{"type":14,"tag":53,"props":878,"children":879},{"class":55,"line":236},[880],{"type":14,"tag":53,"props":881,"children":882},{},[883],{"type":20,"value":884},"    # Calculate derived fields\n",{"type":14,"tag":53,"props":886,"children":887},{"class":55,"line":245},[888],{"type":14,"tag":53,"props":889,"children":890},{},[891],{"type":20,"value":892},"    data['lifetime_value'] = (\n",{"type":14,"tag":53,"props":894,"children":895},{"class":55,"line":254},[896],{"type":14,"tag":53,"props":897,"children":898},{},[899],{"type":20,"value":900},"        data['revenue'] * data['months_active']\n",{"type":14,"tag":53,"props":902,"children":903},{"class":55,"line":263},[904],{"type":14,"tag":53,"props":905,"children":906},{},[907],{"type":20,"value":869},{"type":14,"tag":53,"props":909,"children":910},{"class":55,"line":271},[911],{"type":14,"tag":53,"props":912,"children":913},{},[914],{"type":20,"value":98},{"type":14,"tag":53,"props":916,"children":917},{"class":55,"line":280},[918],{"type":14,"tag":53,"props":919,"children":920},{},[921],{"type":20,"value":922},"    # Add lookups\n",{"type":14,"tag":53,"props":924,"children":925},{"class":55,"line":289},[926],{"type":14,"tag":53,"props":927,"children":928},{},[929],{"type":20,"value":930},"    data['region_name'] = data['region_code'].map(REGION_LOOKUP)\n",{"type":14,"tag":53,"props":932,"children":933},{"class":55,"line":298},[934],{"type":14,"tag":53,"props":935,"children":936},{},[937],{"type":20,"value":98},{"type":14,"tag":53,"props":939,"children":940},{"class":55,"line":307},[941],{"type":14,"tag":53,"props":942,"children":943},{},[944],{"type":20,"value":945},"    # Add timestamps\n",{"type":14,"tag":53,"props":947,"children":948},{"class":55,"line":476},[949],{"type":14,"tag":53,"props":950,"children":951},{},[952],{"type":20,"value":953},"    data['processed_at'] = pd.Timestamp.now()\n",{"type":14,"tag":53,"props":955,"children":956},{"class":55,"line":485},[957],{"type":14,"tag":53,"props":958,"children":959},{},[960],{"type":20,"value":98},{"type":14,"tag":53,"props":962,"children":963},{"class":55,"line":494},[964],{"type":14,"tag":53,"props":965,"children":966},{},[967],{"type":20,"value":500},{"type":14,"tag":35,"props":969,"children":971},{"id":970},"aggregate-data",[972],{"type":20,"value":973},"Aggregate Data",{"type":14,"tag":42,"props":975,"children":977},{"code":976,"language":146,"meta":7,"className":147,"style":7},"def aggregate_data(data):\n    \"\"\"Summarize to appropriate level\"\"\"\n    \n    # Daily summary\n    daily = data.groupby('date').agg({\n        'revenue': ['sum', 'mean'],\n        'orders': 'count',\n        'customers': 'nunique'\n    }).reset_index()\n    \n    # By region\n    by_region = data.groupby('region').agg({\n        'revenue': 'sum',\n        'avg_order_value': 'mean'\n    }).reset_index()\n    \n    # Pivot for analysis\n    pivot = data.pivot_table(\n        values='revenue',\n        index='date',\n        columns='region',\n        aggfunc='sum'\n    )\n    \n    return {\n        'daily': daily,\n        'by_region': by_region,\n        'pivot': pivot\n    }\n",[978],{"type":14,"tag":49,"props":979,"children":980},{"__ignoreMap":7},[981,989,997,1004,1012,1020,1028,1036,1044,1052,1059,1067,1075,1083,1091,1098,1105,1113,1121,1129,1137,1145,1153,1161,1169,1178,1187,1196,1205],{"type":14,"tag":53,"props":982,"children":983},{"class":55,"line":56},[984],{"type":14,"tag":53,"props":985,"children":986},{},[987],{"type":20,"value":988},"def aggregate_data(data):\n",{"type":14,"tag":53,"props":990,"children":991},{"class":55,"line":65},[992],{"type":14,"tag":53,"props":993,"children":994},{},[995],{"type":20,"value":996},"    \"\"\"Summarize to appropriate level\"\"\"\n",{"type":14,"tag":53,"props":998,"children":999},{"class":55,"line":74},[1000],{"type":14,"tag":53,"props":1001,"children":1002},{},[1003],{"type":20,"value":98},{"type":14,"tag":53,"props":1005,"children":1006},{"class":55,"line":83},[1007],{"type":14,"tag":53,"props":1008,"children":1009},{},[1010],{"type":20,"value":1011},"    # Daily summary\n",{"type":14,"tag":53,"props":1013,"children":1014},{"class":55,"line":92},[1015],{"type":14,"tag":53,"props":1016,"children":1017},{},[1018],{"type":20,"value":1019},"    daily = data.groupby('date').agg({\n",{"type":14,"tag":53,"props":1021,"children":1022},{"class":55,"line":101},[1023],{"type":14,"tag":53,"props":1024,"children":1025},{},[1026],{"type":20,"value":1027},"        'revenue': ['sum', 'mean'],\n",{"type":14,"tag":53,"props":1029,"children":1030},{"class":55,"line":110},[1031],{"type":14,"tag":53,"props":1032,"children":1033},{},[1034],{"type":20,"value":1035},"        'orders': 'count',\n",{"type":14,"tag":53,"props":1037,"children":1038},{"class":55,"line":119},[1039],{"type":14,"tag":53,"props":1040,"children":1041},{},[1042],{"type":20,"value":1043},"        'customers': 'nunique'\n",{"type":14,"tag":53,"props":1045,"children":1046},{"class":55,"line":219},[1047],{"type":14,"tag":53,"props":1048,"children":1049},{},[1050],{"type":20,"value":1051},"    }).reset_index()\n",{"type":14,"tag":53,"props":1053,"children":1054},{"class":55,"line":228},[1055],{"type":14,"tag":53,"props":1056,"children":1057},{},[1058],{"type":20,"value":98},{"type":14,"tag":53,"props":1060,"children":1061},{"class":55,"line":236},[1062],{"type":14,"tag":53,"props":1063,"children":1064},{},[1065],{"type":20,"value":1066},"    # By region\n",{"type":14,"tag":53,"props":1068,"children":1069},{"class":55,"line":245},[1070],{"type":14,"tag":53,"props":1071,"children":1072},{},[1073],{"type":20,"value":1074},"    by_region = data.groupby('region').agg({\n",{"type":14,"tag":53,"props":1076,"children":1077},{"class":55,"line":254},[1078],{"type":14,"tag":53,"props":1079,"children":1080},{},[1081],{"type":20,"value":1082},"        'revenue': 'sum',\n",{"type":14,"tag":53,"props":1084,"children":1085},{"class":55,"line":263},[1086],{"type":14,"tag":53,"props":1087,"children":1088},{},[1089],{"type":20,"value":1090},"        'avg_order_value': 'mean'\n",{"type":14,"tag":53,"props":1092,"children":1093},{"class":55,"line":271},[1094],{"type":14,"tag":53,"props":1095,"children":1096},{},[1097],{"type":20,"value":1051},{"type":14,"tag":53,"props":1099,"children":1100},{"class":55,"line":280},[1101],{"type":14,"tag":53,"props":1102,"children":1103},{},[1104],{"type":20,"value":98},{"type":14,"tag":53,"props":1106,"children":1107},{"class":55,"line":289},[1108],{"type":14,"tag":53,"props":1109,"children":1110},{},[1111],{"type":20,"value":1112},"    # Pivot for analysis\n",{"type":14,"tag":53,"props":1114,"children":1115},{"class":55,"line":298},[1116],{"type":14,"tag":53,"props":1117,"children":1118},{},[1119],{"type":20,"value":1120},"    pivot = data.pivot_table(\n",{"type":14,"tag":53,"props":1122,"children":1123},{"class":55,"line":307},[1124],{"type":14,"tag":53,"props":1125,"children":1126},{},[1127],{"type":20,"value":1128},"        values='revenue',\n",{"type":14,"tag":53,"props":1130,"children":1131},{"class":55,"line":476},[1132],{"type":14,"tag":53,"props":1133,"children":1134},{},[1135],{"type":20,"value":1136},"        index='date',\n",{"type":14,"tag":53,"props":1138,"children":1139},{"class":55,"line":485},[1140],{"type":14,"tag":53,"props":1141,"children":1142},{},[1143],{"type":20,"value":1144},"        columns='region',\n",{"type":14,"tag":53,"props":1146,"children":1147},{"class":55,"line":494},[1148],{"type":14,"tag":53,"props":1149,"children":1150},{},[1151],{"type":20,"value":1152},"        aggfunc='sum'\n",{"type":14,"tag":53,"props":1154,"children":1156},{"class":55,"line":1155},23,[1157],{"type":14,"tag":53,"props":1158,"children":1159},{},[1160],{"type":20,"value":869},{"type":14,"tag":53,"props":1162,"children":1164},{"class":55,"line":1163},24,[1165],{"type":14,"tag":53,"props":1166,"children":1167},{},[1168],{"type":20,"value":98},{"type":14,"tag":53,"props":1170,"children":1172},{"class":55,"line":1171},25,[1173],{"type":14,"tag":53,"props":1174,"children":1175},{},[1176],{"type":20,"value":1177},"    return {\n",{"type":14,"tag":53,"props":1179,"children":1181},{"class":55,"line":1180},26,[1182],{"type":14,"tag":53,"props":1183,"children":1184},{},[1185],{"type":20,"value":1186},"        'daily': daily,\n",{"type":14,"tag":53,"props":1188,"children":1190},{"class":55,"line":1189},27,[1191],{"type":14,"tag":53,"props":1192,"children":1193},{},[1194],{"type":20,"value":1195},"        'by_region': by_region,\n",{"type":14,"tag":53,"props":1197,"children":1199},{"class":55,"line":1198},28,[1200],{"type":14,"tag":53,"props":1201,"children":1202},{},[1203],{"type":20,"value":1204},"        'pivot': pivot\n",{"type":14,"tag":53,"props":1206,"children":1208},{"class":55,"line":1207},29,[1209],{"type":14,"tag":53,"props":1210,"children":1211},{},[1212],{"type":20,"value":397},{"type":14,"tag":28,"props":1214,"children":1216},{"id":1215},"layer-3-presentation",[1217],{"type":20,"value":1218},"Layer 3: Presentation",{"type":14,"tag":35,"props":1220,"children":1222},{"id":1221},"load-to-destinations",[1223],{"type":20,"value":1224},"Load to Destinations",{"type":14,"tag":42,"props":1226,"children":1228},{"code":1227,"language":146,"meta":7,"className":147,"style":7},"def load_to_destinations(data):\n    \"\"\"Put clean data where users can access it\"\"\"\n    \n    # Production database\n    data.to_sql(\n        'customer_metrics',\n        production_db,\n        if_exists='replace',\n        index=False\n    )\n    \n    # Data warehouse\n    data.to_parquet('s3:\u002F\u002Fwarehouse\u002Fcustomer_metrics\u002F')\n    \n    # For reports\n    data.to_csv('s3:\u002F\u002Freports\u002Fcustomer_metrics_latest.csv')\n    \n    # Cache for dashboards\n    cache.set('customer_metrics', data.to_json())\n",[1229],{"type":14,"tag":49,"props":1230,"children":1231},{"__ignoreMap":7},[1232,1240,1248,1255,1263,1271,1279,1287,1295,1303,1310,1317,1325,1333,1340,1348,1356,1363,1371],{"type":14,"tag":53,"props":1233,"children":1234},{"class":55,"line":56},[1235],{"type":14,"tag":53,"props":1236,"children":1237},{},[1238],{"type":20,"value":1239},"def load_to_destinations(data):\n",{"type":14,"tag":53,"props":1241,"children":1242},{"class":55,"line":65},[1243],{"type":14,"tag":53,"props":1244,"children":1245},{},[1246],{"type":20,"value":1247},"    \"\"\"Put clean data where users can access it\"\"\"\n",{"type":14,"tag":53,"props":1249,"children":1250},{"class":55,"line":74},[1251],{"type":14,"tag":53,"props":1252,"children":1253},{},[1254],{"type":20,"value":98},{"type":14,"tag":53,"props":1256,"children":1257},{"class":55,"line":83},[1258],{"type":14,"tag":53,"props":1259,"children":1260},{},[1261],{"type":20,"value":1262},"    # Production database\n",{"type":14,"tag":53,"props":1264,"children":1265},{"class":55,"line":92},[1266],{"type":14,"tag":53,"props":1267,"children":1268},{},[1269],{"type":20,"value":1270},"    data.to_sql(\n",{"type":14,"tag":53,"props":1272,"children":1273},{"class":55,"line":101},[1274],{"type":14,"tag":53,"props":1275,"children":1276},{},[1277],{"type":20,"value":1278},"        'customer_metrics',\n",{"type":14,"tag":53,"props":1280,"children":1281},{"class":55,"line":110},[1282],{"type":14,"tag":53,"props":1283,"children":1284},{},[1285],{"type":20,"value":1286},"        production_db,\n",{"type":14,"tag":53,"props":1288,"children":1289},{"class":55,"line":119},[1290],{"type":14,"tag":53,"props":1291,"children":1292},{},[1293],{"type":20,"value":1294},"        if_exists='replace',\n",{"type":14,"tag":53,"props":1296,"children":1297},{"class":55,"line":219},[1298],{"type":14,"tag":53,"props":1299,"children":1300},{},[1301],{"type":20,"value":1302},"        index=False\n",{"type":14,"tag":53,"props":1304,"children":1305},{"class":55,"line":228},[1306],{"type":14,"tag":53,"props":1307,"children":1308},{},[1309],{"type":20,"value":869},{"type":14,"tag":53,"props":1311,"children":1312},{"class":55,"line":236},[1313],{"type":14,"tag":53,"props":1314,"children":1315},{},[1316],{"type":20,"value":98},{"type":14,"tag":53,"props":1318,"children":1319},{"class":55,"line":245},[1320],{"type":14,"tag":53,"props":1321,"children":1322},{},[1323],{"type":20,"value":1324},"    # Data warehouse\n",{"type":14,"tag":53,"props":1326,"children":1327},{"class":55,"line":254},[1328],{"type":14,"tag":53,"props":1329,"children":1330},{},[1331],{"type":20,"value":1332},"    data.to_parquet('s3:\u002F\u002Fwarehouse\u002Fcustomer_metrics\u002F')\n",{"type":14,"tag":53,"props":1334,"children":1335},{"class":55,"line":263},[1336],{"type":14,"tag":53,"props":1337,"children":1338},{},[1339],{"type":20,"value":98},{"type":14,"tag":53,"props":1341,"children":1342},{"class":55,"line":271},[1343],{"type":14,"tag":53,"props":1344,"children":1345},{},[1346],{"type":20,"value":1347},"    # For reports\n",{"type":14,"tag":53,"props":1349,"children":1350},{"class":55,"line":280},[1351],{"type":14,"tag":53,"props":1352,"children":1353},{},[1354],{"type":20,"value":1355},"    data.to_csv('s3:\u002F\u002Freports\u002Fcustomer_metrics_latest.csv')\n",{"type":14,"tag":53,"props":1357,"children":1358},{"class":55,"line":289},[1359],{"type":14,"tag":53,"props":1360,"children":1361},{},[1362],{"type":20,"value":98},{"type":14,"tag":53,"props":1364,"children":1365},{"class":55,"line":298},[1366],{"type":14,"tag":53,"props":1367,"children":1368},{},[1369],{"type":20,"value":1370},"    # Cache for dashboards\n",{"type":14,"tag":53,"props":1372,"children":1373},{"class":55,"line":307},[1374],{"type":14,"tag":53,"props":1375,"children":1376},{},[1377],{"type":20,"value":1378},"    cache.set('customer_metrics', data.to_json())\n",{"type":14,"tag":35,"props":1380,"children":1382},{"id":1381},"generate-reports",[1383],{"type":20,"value":1384},"Generate Reports",{"type":14,"tag":42,"props":1386,"children":1388},{"code":1387,"language":146,"meta":7,"className":147,"style":7},"def generate_report(data):\n    \"\"\"Create human-readable report\"\"\"\n    \n    report = f\"\"\"\n    Customer Metrics Report\n    Generated: {pd.Timestamp.now()}\n    \n    Total Customers: {len(data)}\n    Total Revenue: ${data['revenue'].sum():,.0f}\n    Average Order Value: ${data['revenue'].mean():.2f}\n    \n    Top 5 Regions by Revenue:\n    {data.groupby('region')['revenue'].sum().nlargest(5).to_string()}\n    \n    Alerts:\n    - {len(data[data['revenue'] \u003C 0])} rows with negative revenue\n    - {data['created_at'].isnull().sum()} rows missing dates\n    \"\"\"\n    \n    return report\n\n# Send to stakeholders\nreport = generate_report(processed_data)\nsend_email(to='team@company.com', subject='Daily Metrics', body=report)\n",[1389],{"type":14,"tag":49,"props":1390,"children":1391},{"__ignoreMap":7},[1392,1400,1408,1415,1423,1431,1439,1446,1454,1462,1470,1477,1485,1493,1500,1508,1516,1524,1532,1539,1547,1554,1562,1570],{"type":14,"tag":53,"props":1393,"children":1394},{"class":55,"line":56},[1395],{"type":14,"tag":53,"props":1396,"children":1397},{},[1398],{"type":20,"value":1399},"def generate_report(data):\n",{"type":14,"tag":53,"props":1401,"children":1402},{"class":55,"line":65},[1403],{"type":14,"tag":53,"props":1404,"children":1405},{},[1406],{"type":20,"value":1407},"    \"\"\"Create human-readable report\"\"\"\n",{"type":14,"tag":53,"props":1409,"children":1410},{"class":55,"line":74},[1411],{"type":14,"tag":53,"props":1412,"children":1413},{},[1414],{"type":20,"value":98},{"type":14,"tag":53,"props":1416,"children":1417},{"class":55,"line":83},[1418],{"type":14,"tag":53,"props":1419,"children":1420},{},[1421],{"type":20,"value":1422},"    report = f\"\"\"\n",{"type":14,"tag":53,"props":1424,"children":1425},{"class":55,"line":92},[1426],{"type":14,"tag":53,"props":1427,"children":1428},{},[1429],{"type":20,"value":1430},"    Customer Metrics Report\n",{"type":14,"tag":53,"props":1432,"children":1433},{"class":55,"line":101},[1434],{"type":14,"tag":53,"props":1435,"children":1436},{},[1437],{"type":20,"value":1438},"    Generated: {pd.Timestamp.now()}\n",{"type":14,"tag":53,"props":1440,"children":1441},{"class":55,"line":110},[1442],{"type":14,"tag":53,"props":1443,"children":1444},{},[1445],{"type":20,"value":98},{"type":14,"tag":53,"props":1447,"children":1448},{"class":55,"line":119},[1449],{"type":14,"tag":53,"props":1450,"children":1451},{},[1452],{"type":20,"value":1453},"    Total Customers: {len(data)}\n",{"type":14,"tag":53,"props":1455,"children":1456},{"class":55,"line":219},[1457],{"type":14,"tag":53,"props":1458,"children":1459},{},[1460],{"type":20,"value":1461},"    Total Revenue: ${data['revenue'].sum():,.0f}\n",{"type":14,"tag":53,"props":1463,"children":1464},{"class":55,"line":228},[1465],{"type":14,"tag":53,"props":1466,"children":1467},{},[1468],{"type":20,"value":1469},"    Average Order Value: ${data['revenue'].mean():.2f}\n",{"type":14,"tag":53,"props":1471,"children":1472},{"class":55,"line":236},[1473],{"type":14,"tag":53,"props":1474,"children":1475},{},[1476],{"type":20,"value":98},{"type":14,"tag":53,"props":1478,"children":1479},{"class":55,"line":245},[1480],{"type":14,"tag":53,"props":1481,"children":1482},{},[1483],{"type":20,"value":1484},"    Top 5 Regions by Revenue:\n",{"type":14,"tag":53,"props":1486,"children":1487},{"class":55,"line":254},[1488],{"type":14,"tag":53,"props":1489,"children":1490},{},[1491],{"type":20,"value":1492},"    {data.groupby('region')['revenue'].sum().nlargest(5).to_string()}\n",{"type":14,"tag":53,"props":1494,"children":1495},{"class":55,"line":263},[1496],{"type":14,"tag":53,"props":1497,"children":1498},{},[1499],{"type":20,"value":98},{"type":14,"tag":53,"props":1501,"children":1502},{"class":55,"line":271},[1503],{"type":14,"tag":53,"props":1504,"children":1505},{},[1506],{"type":20,"value":1507},"    Alerts:\n",{"type":14,"tag":53,"props":1509,"children":1510},{"class":55,"line":280},[1511],{"type":14,"tag":53,"props":1512,"children":1513},{},[1514],{"type":20,"value":1515},"    - {len(data[data['revenue'] \u003C 0])} rows with negative revenue\n",{"type":14,"tag":53,"props":1517,"children":1518},{"class":55,"line":289},[1519],{"type":14,"tag":53,"props":1520,"children":1521},{},[1522],{"type":20,"value":1523},"    - {data['created_at'].isnull().sum()} rows missing dates\n",{"type":14,"tag":53,"props":1525,"children":1526},{"class":55,"line":298},[1527],{"type":14,"tag":53,"props":1528,"children":1529},{},[1530],{"type":20,"value":1531},"    \"\"\"\n",{"type":14,"tag":53,"props":1533,"children":1534},{"class":55,"line":307},[1535],{"type":14,"tag":53,"props":1536,"children":1537},{},[1538],{"type":20,"value":98},{"type":14,"tag":53,"props":1540,"children":1541},{"class":55,"line":476},[1542],{"type":14,"tag":53,"props":1543,"children":1544},{},[1545],{"type":20,"value":1546},"    return report\n",{"type":14,"tag":53,"props":1548,"children":1549},{"class":55,"line":485},[1550],{"type":14,"tag":53,"props":1551,"children":1552},{"emptyLinePlaceholder":189},[1553],{"type":20,"value":192},{"type":14,"tag":53,"props":1555,"children":1556},{"class":55,"line":494},[1557],{"type":14,"tag":53,"props":1558,"children":1559},{},[1560],{"type":20,"value":1561},"# Send to stakeholders\n",{"type":14,"tag":53,"props":1563,"children":1564},{"class":55,"line":1155},[1565],{"type":14,"tag":53,"props":1566,"children":1567},{},[1568],{"type":20,"value":1569},"report = generate_report(processed_data)\n",{"type":14,"tag":53,"props":1571,"children":1572},{"class":55,"line":1163},[1573],{"type":14,"tag":53,"props":1574,"children":1575},{},[1576],{"type":20,"value":1577},"send_email(to='team@company.com', subject='Daily Metrics', body=report)\n",{"type":14,"tag":28,"props":1579,"children":1581},{"id":1580},"complete-pipeline-example",[1582],{"type":20,"value":1583},"Complete Pipeline Example",{"type":14,"tag":35,"props":1585,"children":1587},{"id":1586},"daily-customer-metrics-pipeline",[1588],{"type":20,"value":1589},"Daily Customer Metrics Pipeline",{"type":14,"tag":42,"props":1591,"children":1593},{"code":1592,"language":146,"meta":7,"className":147,"style":7},"# ORCHESTRATION: Schedule daily at 9 AM\n\ndef run_daily_pipeline():\n    \"\"\"Complete data pipeline\"\"\"\n    \n    # INGESTION LAYER\n    print(\"Step 1: Extracting data...\")\n    raw_customers = extract_from_postgres()\n    raw_orders = extract_from_api()\n    validate_ingestion(raw_customers)\n    validate_ingestion(raw_orders)\n    \n    # TRANSFORMATION LAYER\n    print(\"Step 2: Cleaning data...\")\n    clean_customers = clean_data(raw_customers)\n    clean_orders = clean_data(raw_orders)\n    \n    print(\"Step 3: Validating quality...\")\n    validate_data_quality(clean_customers)\n    validate_data_quality(clean_orders)\n    \n    print(\"Step 4: Enriching data...\")\n    merged = clean_customers.merge(\n        clean_orders,\n        on='customer_id',\n        how='left'\n    )\n    enriched = enrich_data(merged)\n    \n    print(\"Step 5: Aggregating...\")\n    aggregated = aggregate_data(enriched)\n    \n    # PRESENTATION LAYER\n    print(\"Step 6: Loading to destinations...\")\n    load_to_destinations(aggregated['daily'])\n    \n    print(\"Step 7: Generating reports...\")\n    report = generate_report(aggregated['daily'])\n    send_email('team@company.com', report)\n    \n    print(\"✓ Pipeline completed successfully\")\n\n# Result: 9:15 AM - Team has fresh data and reports!\n",[1594],{"type":14,"tag":49,"props":1595,"children":1596},{"__ignoreMap":7},[1597,1605,1612,1620,1628,1635,1643,1651,1659,1667,1675,1683,1690,1698,1706,1714,1722,1729,1737,1745,1753,1760,1768,1776,1784,1791,1798,1805,1813,1820,1829,1838,1846,1855,1864,1873,1881,1890,1899,1908,1916,1925,1933],{"type":14,"tag":53,"props":1598,"children":1599},{"class":55,"line":56},[1600],{"type":14,"tag":53,"props":1601,"children":1602},{},[1603],{"type":20,"value":1604},"# ORCHESTRATION: Schedule daily at 9 AM\n",{"type":14,"tag":53,"props":1606,"children":1607},{"class":55,"line":65},[1608],{"type":14,"tag":53,"props":1609,"children":1610},{"emptyLinePlaceholder":189},[1611],{"type":20,"value":192},{"type":14,"tag":53,"props":1613,"children":1614},{"class":55,"line":74},[1615],{"type":14,"tag":53,"props":1616,"children":1617},{},[1618],{"type":20,"value":1619},"def run_daily_pipeline():\n",{"type":14,"tag":53,"props":1621,"children":1622},{"class":55,"line":83},[1623],{"type":14,"tag":53,"props":1624,"children":1625},{},[1626],{"type":20,"value":1627},"    \"\"\"Complete data pipeline\"\"\"\n",{"type":14,"tag":53,"props":1629,"children":1630},{"class":55,"line":92},[1631],{"type":14,"tag":53,"props":1632,"children":1633},{},[1634],{"type":20,"value":98},{"type":14,"tag":53,"props":1636,"children":1637},{"class":55,"line":101},[1638],{"type":14,"tag":53,"props":1639,"children":1640},{},[1641],{"type":20,"value":1642},"    # INGESTION LAYER\n",{"type":14,"tag":53,"props":1644,"children":1645},{"class":55,"line":110},[1646],{"type":14,"tag":53,"props":1647,"children":1648},{},[1649],{"type":20,"value":1650},"    print(\"Step 1: Extracting data...\")\n",{"type":14,"tag":53,"props":1652,"children":1653},{"class":55,"line":119},[1654],{"type":14,"tag":53,"props":1655,"children":1656},{},[1657],{"type":20,"value":1658},"    raw_customers = extract_from_postgres()\n",{"type":14,"tag":53,"props":1660,"children":1661},{"class":55,"line":219},[1662],{"type":14,"tag":53,"props":1663,"children":1664},{},[1665],{"type":20,"value":1666},"    raw_orders = extract_from_api()\n",{"type":14,"tag":53,"props":1668,"children":1669},{"class":55,"line":228},[1670],{"type":14,"tag":53,"props":1671,"children":1672},{},[1673],{"type":20,"value":1674},"    validate_ingestion(raw_customers)\n",{"type":14,"tag":53,"props":1676,"children":1677},{"class":55,"line":236},[1678],{"type":14,"tag":53,"props":1679,"children":1680},{},[1681],{"type":20,"value":1682},"    validate_ingestion(raw_orders)\n",{"type":14,"tag":53,"props":1684,"children":1685},{"class":55,"line":245},[1686],{"type":14,"tag":53,"props":1687,"children":1688},{},[1689],{"type":20,"value":98},{"type":14,"tag":53,"props":1691,"children":1692},{"class":55,"line":254},[1693],{"type":14,"tag":53,"props":1694,"children":1695},{},[1696],{"type":20,"value":1697},"    # TRANSFORMATION LAYER\n",{"type":14,"tag":53,"props":1699,"children":1700},{"class":55,"line":263},[1701],{"type":14,"tag":53,"props":1702,"children":1703},{},[1704],{"type":20,"value":1705},"    print(\"Step 2: Cleaning data...\")\n",{"type":14,"tag":53,"props":1707,"children":1708},{"class":55,"line":271},[1709],{"type":14,"tag":53,"props":1710,"children":1711},{},[1712],{"type":20,"value":1713},"    clean_customers = clean_data(raw_customers)\n",{"type":14,"tag":53,"props":1715,"children":1716},{"class":55,"line":280},[1717],{"type":14,"tag":53,"props":1718,"children":1719},{},[1720],{"type":20,"value":1721},"    clean_orders = clean_data(raw_orders)\n",{"type":14,"tag":53,"props":1723,"children":1724},{"class":55,"line":289},[1725],{"type":14,"tag":53,"props":1726,"children":1727},{},[1728],{"type":20,"value":98},{"type":14,"tag":53,"props":1730,"children":1731},{"class":55,"line":298},[1732],{"type":14,"tag":53,"props":1733,"children":1734},{},[1735],{"type":20,"value":1736},"    print(\"Step 3: Validating quality...\")\n",{"type":14,"tag":53,"props":1738,"children":1739},{"class":55,"line":307},[1740],{"type":14,"tag":53,"props":1741,"children":1742},{},[1743],{"type":20,"value":1744},"    validate_data_quality(clean_customers)\n",{"type":14,"tag":53,"props":1746,"children":1747},{"class":55,"line":476},[1748],{"type":14,"tag":53,"props":1749,"children":1750},{},[1751],{"type":20,"value":1752},"    validate_data_quality(clean_orders)\n",{"type":14,"tag":53,"props":1754,"children":1755},{"class":55,"line":485},[1756],{"type":14,"tag":53,"props":1757,"children":1758},{},[1759],{"type":20,"value":98},{"type":14,"tag":53,"props":1761,"children":1762},{"class":55,"line":494},[1763],{"type":14,"tag":53,"props":1764,"children":1765},{},[1766],{"type":20,"value":1767},"    print(\"Step 4: Enriching data...\")\n",{"type":14,"tag":53,"props":1769,"children":1770},{"class":55,"line":1155},[1771],{"type":14,"tag":53,"props":1772,"children":1773},{},[1774],{"type":20,"value":1775},"    merged = clean_customers.merge(\n",{"type":14,"tag":53,"props":1777,"children":1778},{"class":55,"line":1163},[1779],{"type":14,"tag":53,"props":1780,"children":1781},{},[1782],{"type":20,"value":1783},"        clean_orders,\n",{"type":14,"tag":53,"props":1785,"children":1786},{"class":55,"line":1171},[1787],{"type":14,"tag":53,"props":1788,"children":1789},{},[1790],{"type":20,"value":853},{"type":14,"tag":53,"props":1792,"children":1793},{"class":55,"line":1180},[1794],{"type":14,"tag":53,"props":1795,"children":1796},{},[1797],{"type":20,"value":861},{"type":14,"tag":53,"props":1799,"children":1800},{"class":55,"line":1189},[1801],{"type":14,"tag":53,"props":1802,"children":1803},{},[1804],{"type":20,"value":869},{"type":14,"tag":53,"props":1806,"children":1807},{"class":55,"line":1198},[1808],{"type":14,"tag":53,"props":1809,"children":1810},{},[1811],{"type":20,"value":1812},"    enriched = enrich_data(merged)\n",{"type":14,"tag":53,"props":1814,"children":1815},{"class":55,"line":1207},[1816],{"type":14,"tag":53,"props":1817,"children":1818},{},[1819],{"type":20,"value":98},{"type":14,"tag":53,"props":1821,"children":1823},{"class":55,"line":1822},30,[1824],{"type":14,"tag":53,"props":1825,"children":1826},{},[1827],{"type":20,"value":1828},"    print(\"Step 5: Aggregating...\")\n",{"type":14,"tag":53,"props":1830,"children":1832},{"class":55,"line":1831},31,[1833],{"type":14,"tag":53,"props":1834,"children":1835},{},[1836],{"type":20,"value":1837},"    aggregated = aggregate_data(enriched)\n",{"type":14,"tag":53,"props":1839,"children":1841},{"class":55,"line":1840},32,[1842],{"type":14,"tag":53,"props":1843,"children":1844},{},[1845],{"type":20,"value":98},{"type":14,"tag":53,"props":1847,"children":1849},{"class":55,"line":1848},33,[1850],{"type":14,"tag":53,"props":1851,"children":1852},{},[1853],{"type":20,"value":1854},"    # PRESENTATION LAYER\n",{"type":14,"tag":53,"props":1856,"children":1858},{"class":55,"line":1857},34,[1859],{"type":14,"tag":53,"props":1860,"children":1861},{},[1862],{"type":20,"value":1863},"    print(\"Step 6: Loading to destinations...\")\n",{"type":14,"tag":53,"props":1865,"children":1867},{"class":55,"line":1866},35,[1868],{"type":14,"tag":53,"props":1869,"children":1870},{},[1871],{"type":20,"value":1872},"    load_to_destinations(aggregated['daily'])\n",{"type":14,"tag":53,"props":1874,"children":1876},{"class":55,"line":1875},36,[1877],{"type":14,"tag":53,"props":1878,"children":1879},{},[1880],{"type":20,"value":98},{"type":14,"tag":53,"props":1882,"children":1884},{"class":55,"line":1883},37,[1885],{"type":14,"tag":53,"props":1886,"children":1887},{},[1888],{"type":20,"value":1889},"    print(\"Step 7: Generating reports...\")\n",{"type":14,"tag":53,"props":1891,"children":1893},{"class":55,"line":1892},38,[1894],{"type":14,"tag":53,"props":1895,"children":1896},{},[1897],{"type":20,"value":1898},"    report = generate_report(aggregated['daily'])\n",{"type":14,"tag":53,"props":1900,"children":1902},{"class":55,"line":1901},39,[1903],{"type":14,"tag":53,"props":1904,"children":1905},{},[1906],{"type":20,"value":1907},"    send_email('team@company.com', report)\n",{"type":14,"tag":53,"props":1909,"children":1911},{"class":55,"line":1910},40,[1912],{"type":14,"tag":53,"props":1913,"children":1914},{},[1915],{"type":20,"value":98},{"type":14,"tag":53,"props":1917,"children":1919},{"class":55,"line":1918},41,[1920],{"type":14,"tag":53,"props":1921,"children":1922},{},[1923],{"type":20,"value":1924},"    print(\"✓ Pipeline completed successfully\")\n",{"type":14,"tag":53,"props":1926,"children":1928},{"class":55,"line":1927},42,[1929],{"type":14,"tag":53,"props":1930,"children":1931},{"emptyLinePlaceholder":189},[1932],{"type":20,"value":192},{"type":14,"tag":53,"props":1934,"children":1936},{"class":55,"line":1935},43,[1937],{"type":14,"tag":53,"props":1938,"children":1939},{},[1940],{"type":20,"value":1941},"# Result: 9:15 AM - Team has fresh data and reports!\n",{"type":14,"tag":28,"props":1943,"children":1945},{"id":1944},"error-handling",[1946],{"type":20,"value":1947},"Error Handling",{"type":14,"tag":35,"props":1949,"children":1951},{"id":1950},"graceful-failures",[1952],{"type":20,"value":1953},"Graceful Failures",{"type":14,"tag":42,"props":1955,"children":1957},{"code":1956,"language":146,"meta":7,"className":147,"style":7},"def pipeline_with_retry(max_retries=3):\n    \"\"\"Retry failed steps\"\"\"\n    \n    for attempt in range(max_retries):\n        try:\n            data = extract_from_source()\n            validate_ingestion(data)\n            break  # Success\n        except Exception as e:\n            print(f\"Attempt {attempt + 1} failed: {e}\")\n            if attempt \u003C max_retries - 1:\n                wait_time = 2 ** attempt  # 1s, 2s, 4s\n                print(f\"Retrying in {wait_time} seconds...\")\n                time.sleep(wait_time)\n            else:\n                raise  # Give up after max retries\n\n# In Orchestration, set retry policy:\n# max_retries: 3\n# wait_between: exponential backoff\n",[1958],{"type":14,"tag":49,"props":1959,"children":1960},{"__ignoreMap":7},[1961,1969,1977,1984,1992,2000,2008,2016,2024,2032,2040,2048,2056,2064,2072,2080,2088,2095,2103,2111],{"type":14,"tag":53,"props":1962,"children":1963},{"class":55,"line":56},[1964],{"type":14,"tag":53,"props":1965,"children":1966},{},[1967],{"type":20,"value":1968},"def pipeline_with_retry(max_retries=3):\n",{"type":14,"tag":53,"props":1970,"children":1971},{"class":55,"line":65},[1972],{"type":14,"tag":53,"props":1973,"children":1974},{},[1975],{"type":20,"value":1976},"    \"\"\"Retry failed steps\"\"\"\n",{"type":14,"tag":53,"props":1978,"children":1979},{"class":55,"line":74},[1980],{"type":14,"tag":53,"props":1981,"children":1982},{},[1983],{"type":20,"value":98},{"type":14,"tag":53,"props":1985,"children":1986},{"class":55,"line":83},[1987],{"type":14,"tag":53,"props":1988,"children":1989},{},[1990],{"type":20,"value":1991},"    for attempt in range(max_retries):\n",{"type":14,"tag":53,"props":1993,"children":1994},{"class":55,"line":92},[1995],{"type":14,"tag":53,"props":1996,"children":1997},{},[1998],{"type":20,"value":1999},"        try:\n",{"type":14,"tag":53,"props":2001,"children":2002},{"class":55,"line":101},[2003],{"type":14,"tag":53,"props":2004,"children":2005},{},[2006],{"type":20,"value":2007},"            data = extract_from_source()\n",{"type":14,"tag":53,"props":2009,"children":2010},{"class":55,"line":110},[2011],{"type":14,"tag":53,"props":2012,"children":2013},{},[2014],{"type":20,"value":2015},"            validate_ingestion(data)\n",{"type":14,"tag":53,"props":2017,"children":2018},{"class":55,"line":119},[2019],{"type":14,"tag":53,"props":2020,"children":2021},{},[2022],{"type":20,"value":2023},"            break  # Success\n",{"type":14,"tag":53,"props":2025,"children":2026},{"class":55,"line":219},[2027],{"type":14,"tag":53,"props":2028,"children":2029},{},[2030],{"type":20,"value":2031},"        except Exception as e:\n",{"type":14,"tag":53,"props":2033,"children":2034},{"class":55,"line":228},[2035],{"type":14,"tag":53,"props":2036,"children":2037},{},[2038],{"type":20,"value":2039},"            print(f\"Attempt {attempt + 1} failed: {e}\")\n",{"type":14,"tag":53,"props":2041,"children":2042},{"class":55,"line":236},[2043],{"type":14,"tag":53,"props":2044,"children":2045},{},[2046],{"type":20,"value":2047},"            if attempt \u003C max_retries - 1:\n",{"type":14,"tag":53,"props":2049,"children":2050},{"class":55,"line":245},[2051],{"type":14,"tag":53,"props":2052,"children":2053},{},[2054],{"type":20,"value":2055},"                wait_time = 2 ** attempt  # 1s, 2s, 4s\n",{"type":14,"tag":53,"props":2057,"children":2058},{"class":55,"line":254},[2059],{"type":14,"tag":53,"props":2060,"children":2061},{},[2062],{"type":20,"value":2063},"                print(f\"Retrying in {wait_time} seconds...\")\n",{"type":14,"tag":53,"props":2065,"children":2066},{"class":55,"line":263},[2067],{"type":14,"tag":53,"props":2068,"children":2069},{},[2070],{"type":20,"value":2071},"                time.sleep(wait_time)\n",{"type":14,"tag":53,"props":2073,"children":2074},{"class":55,"line":271},[2075],{"type":14,"tag":53,"props":2076,"children":2077},{},[2078],{"type":20,"value":2079},"            else:\n",{"type":14,"tag":53,"props":2081,"children":2082},{"class":55,"line":280},[2083],{"type":14,"tag":53,"props":2084,"children":2085},{},[2086],{"type":20,"value":2087},"                raise  # Give up after max retries\n",{"type":14,"tag":53,"props":2089,"children":2090},{"class":55,"line":289},[2091],{"type":14,"tag":53,"props":2092,"children":2093},{"emptyLinePlaceholder":189},[2094],{"type":20,"value":192},{"type":14,"tag":53,"props":2096,"children":2097},{"class":55,"line":298},[2098],{"type":14,"tag":53,"props":2099,"children":2100},{},[2101],{"type":20,"value":2102},"# In Orchestration, set retry policy:\n",{"type":14,"tag":53,"props":2104,"children":2105},{"class":55,"line":307},[2106],{"type":14,"tag":53,"props":2107,"children":2108},{},[2109],{"type":20,"value":2110},"# max_retries: 3\n",{"type":14,"tag":53,"props":2112,"children":2113},{"class":55,"line":476},[2114],{"type":14,"tag":53,"props":2115,"children":2116},{},[2117],{"type":20,"value":2118},"# wait_between: exponential backoff\n",{"type":14,"tag":35,"props":2120,"children":2122},{"id":2121},"alerting-on-failures",[2123],{"type":20,"value":2124},"Alerting on Failures",{"type":14,"tag":42,"props":2126,"children":2128},{"code":2127,"language":146,"meta":7,"className":147,"style":7},"def run_pipeline_with_alerts():\n    \"\"\"Alert stakeholders if pipeline fails\"\"\"\n    \n    try:\n        run_daily_pipeline()\n    except Exception as e:\n        # Pipeline failed\n        alert = {\n            'severity': 'critical',\n            'title': 'Daily Pipeline Failed',\n            'error': str(e),\n            'time': pd.Timestamp.now()\n        }\n        \n        # Send alerts to different channels\n        send_email('ops@company.com', json.dumps(alert))\n        send_slack('#data-alerts', alert)\n        send_pagerduty(alert)\n        \n        # And fail the pipeline step\n        raise\n",[2129],{"type":14,"tag":49,"props":2130,"children":2131},{"__ignoreMap":7},[2132,2140,2148,2155,2163,2171,2179,2187,2195,2203,2211,2219,2227,2235,2243,2251,2259,2267,2275,2282,2290],{"type":14,"tag":53,"props":2133,"children":2134},{"class":55,"line":56},[2135],{"type":14,"tag":53,"props":2136,"children":2137},{},[2138],{"type":20,"value":2139},"def run_pipeline_with_alerts():\n",{"type":14,"tag":53,"props":2141,"children":2142},{"class":55,"line":65},[2143],{"type":14,"tag":53,"props":2144,"children":2145},{},[2146],{"type":20,"value":2147},"    \"\"\"Alert stakeholders if pipeline fails\"\"\"\n",{"type":14,"tag":53,"props":2149,"children":2150},{"class":55,"line":74},[2151],{"type":14,"tag":53,"props":2152,"children":2153},{},[2154],{"type":20,"value":98},{"type":14,"tag":53,"props":2156,"children":2157},{"class":55,"line":83},[2158],{"type":14,"tag":53,"props":2159,"children":2160},{},[2161],{"type":20,"value":2162},"    try:\n",{"type":14,"tag":53,"props":2164,"children":2165},{"class":55,"line":92},[2166],{"type":14,"tag":53,"props":2167,"children":2168},{},[2169],{"type":20,"value":2170},"        run_daily_pipeline()\n",{"type":14,"tag":53,"props":2172,"children":2173},{"class":55,"line":101},[2174],{"type":14,"tag":53,"props":2175,"children":2176},{},[2177],{"type":20,"value":2178},"    except Exception as e:\n",{"type":14,"tag":53,"props":2180,"children":2181},{"class":55,"line":110},[2182],{"type":14,"tag":53,"props":2183,"children":2184},{},[2185],{"type":20,"value":2186},"        # Pipeline failed\n",{"type":14,"tag":53,"props":2188,"children":2189},{"class":55,"line":119},[2190],{"type":14,"tag":53,"props":2191,"children":2192},{},[2193],{"type":20,"value":2194},"        alert = {\n",{"type":14,"tag":53,"props":2196,"children":2197},{"class":55,"line":219},[2198],{"type":14,"tag":53,"props":2199,"children":2200},{},[2201],{"type":20,"value":2202},"            'severity': 'critical',\n",{"type":14,"tag":53,"props":2204,"children":2205},{"class":55,"line":228},[2206],{"type":14,"tag":53,"props":2207,"children":2208},{},[2209],{"type":20,"value":2210},"            'title': 'Daily Pipeline Failed',\n",{"type":14,"tag":53,"props":2212,"children":2213},{"class":55,"line":236},[2214],{"type":14,"tag":53,"props":2215,"children":2216},{},[2217],{"type":20,"value":2218},"            'error': str(e),\n",{"type":14,"tag":53,"props":2220,"children":2221},{"class":55,"line":245},[2222],{"type":14,"tag":53,"props":2223,"children":2224},{},[2225],{"type":20,"value":2226},"            'time': pd.Timestamp.now()\n",{"type":14,"tag":53,"props":2228,"children":2229},{"class":55,"line":254},[2230],{"type":14,"tag":53,"props":2231,"children":2232},{},[2233],{"type":20,"value":2234},"        }\n",{"type":14,"tag":53,"props":2236,"children":2237},{"class":55,"line":263},[2238],{"type":14,"tag":53,"props":2239,"children":2240},{},[2241],{"type":20,"value":2242},"        \n",{"type":14,"tag":53,"props":2244,"children":2245},{"class":55,"line":271},[2246],{"type":14,"tag":53,"props":2247,"children":2248},{},[2249],{"type":20,"value":2250},"        # Send alerts to different channels\n",{"type":14,"tag":53,"props":2252,"children":2253},{"class":55,"line":280},[2254],{"type":14,"tag":53,"props":2255,"children":2256},{},[2257],{"type":20,"value":2258},"        send_email('ops@company.com', json.dumps(alert))\n",{"type":14,"tag":53,"props":2260,"children":2261},{"class":55,"line":289},[2262],{"type":14,"tag":53,"props":2263,"children":2264},{},[2265],{"type":20,"value":2266},"        send_slack('#data-alerts', alert)\n",{"type":14,"tag":53,"props":2268,"children":2269},{"class":55,"line":298},[2270],{"type":14,"tag":53,"props":2271,"children":2272},{},[2273],{"type":20,"value":2274},"        send_pagerduty(alert)\n",{"type":14,"tag":53,"props":2276,"children":2277},{"class":55,"line":307},[2278],{"type":14,"tag":53,"props":2279,"children":2280},{},[2281],{"type":20,"value":2242},{"type":14,"tag":53,"props":2283,"children":2284},{"class":55,"line":476},[2285],{"type":14,"tag":53,"props":2286,"children":2287},{},[2288],{"type":20,"value":2289},"        # And fail the pipeline step\n",{"type":14,"tag":53,"props":2291,"children":2292},{"class":55,"line":485},[2293],{"type":14,"tag":53,"props":2294,"children":2295},{},[2296],{"type":20,"value":2297},"        raise\n",{"type":14,"tag":28,"props":2299,"children":2301},{"id":2300},"monitoring-pipeline-health",[2302],{"type":20,"value":2303},"Monitoring Pipeline Health",{"type":14,"tag":35,"props":2305,"children":2307},{"id":2306},"track-pipeline-metrics",[2308],{"type":20,"value":2309},"Track Pipeline Metrics",{"type":14,"tag":42,"props":2311,"children":2313},{"code":2312,"language":146,"meta":7,"className":147,"style":7},"from credvault import metrics\n\ndef monitored_pipeline():\n    \"\"\"Pipeline with monitoring\"\"\"\n    \n    # Track execution time\n    start = time.time()\n    \n    try:\n        data = extract()\n        metrics.gauge('extraction_rows', len(data))\n        \n        clean = clean_data(data)\n        metrics.gauge('rows_after_cleaning', len(clean))\n        metrics.gauge('rows_removed', len(data) - len(clean))\n        \n        validated = validate_quality(clean)\n        metrics.counter('validation_passed', 1)\n        \n        load_data(validated)\n        metrics.counter('successful_loads', 1)\n        \n    except Exception as e:\n        metrics.counter('pipeline_failures', 1)\n        raise\n    \n    finally:\n        duration = time.time() - start\n        metrics.histogram('pipeline_duration_seconds', duration)\n        print(f\"Pipeline took {duration:.1f} seconds\")\n",[2314],{"type":14,"tag":49,"props":2315,"children":2316},{"__ignoreMap":7},[2317,2325,2332,2340,2348,2355,2363,2371,2378,2385,2393,2401,2408,2416,2424,2432,2439,2447,2455,2462,2470,2478,2485,2492,2500,2507,2514,2522,2530,2538],{"type":14,"tag":53,"props":2318,"children":2319},{"class":55,"line":56},[2320],{"type":14,"tag":53,"props":2321,"children":2322},{},[2323],{"type":20,"value":2324},"from credvault import metrics\n",{"type":14,"tag":53,"props":2326,"children":2327},{"class":55,"line":65},[2328],{"type":14,"tag":53,"props":2329,"children":2330},{"emptyLinePlaceholder":189},[2331],{"type":20,"value":192},{"type":14,"tag":53,"props":2333,"children":2334},{"class":55,"line":74},[2335],{"type":14,"tag":53,"props":2336,"children":2337},{},[2338],{"type":20,"value":2339},"def monitored_pipeline():\n",{"type":14,"tag":53,"props":2341,"children":2342},{"class":55,"line":83},[2343],{"type":14,"tag":53,"props":2344,"children":2345},{},[2346],{"type":20,"value":2347},"    \"\"\"Pipeline with monitoring\"\"\"\n",{"type":14,"tag":53,"props":2349,"children":2350},{"class":55,"line":92},[2351],{"type":14,"tag":53,"props":2352,"children":2353},{},[2354],{"type":20,"value":98},{"type":14,"tag":53,"props":2356,"children":2357},{"class":55,"line":101},[2358],{"type":14,"tag":53,"props":2359,"children":2360},{},[2361],{"type":20,"value":2362},"    # Track execution time\n",{"type":14,"tag":53,"props":2364,"children":2365},{"class":55,"line":110},[2366],{"type":14,"tag":53,"props":2367,"children":2368},{},[2369],{"type":20,"value":2370},"    start = time.time()\n",{"type":14,"tag":53,"props":2372,"children":2373},{"class":55,"line":119},[2374],{"type":14,"tag":53,"props":2375,"children":2376},{},[2377],{"type":20,"value":98},{"type":14,"tag":53,"props":2379,"children":2380},{"class":55,"line":219},[2381],{"type":14,"tag":53,"props":2382,"children":2383},{},[2384],{"type":20,"value":2162},{"type":14,"tag":53,"props":2386,"children":2387},{"class":55,"line":228},[2388],{"type":14,"tag":53,"props":2389,"children":2390},{},[2391],{"type":20,"value":2392},"        data = extract()\n",{"type":14,"tag":53,"props":2394,"children":2395},{"class":55,"line":236},[2396],{"type":14,"tag":53,"props":2397,"children":2398},{},[2399],{"type":20,"value":2400},"        metrics.gauge('extraction_rows', len(data))\n",{"type":14,"tag":53,"props":2402,"children":2403},{"class":55,"line":245},[2404],{"type":14,"tag":53,"props":2405,"children":2406},{},[2407],{"type":20,"value":2242},{"type":14,"tag":53,"props":2409,"children":2410},{"class":55,"line":254},[2411],{"type":14,"tag":53,"props":2412,"children":2413},{},[2414],{"type":20,"value":2415},"        clean = clean_data(data)\n",{"type":14,"tag":53,"props":2417,"children":2418},{"class":55,"line":263},[2419],{"type":14,"tag":53,"props":2420,"children":2421},{},[2422],{"type":20,"value":2423},"        metrics.gauge('rows_after_cleaning', len(clean))\n",{"type":14,"tag":53,"props":2425,"children":2426},{"class":55,"line":271},[2427],{"type":14,"tag":53,"props":2428,"children":2429},{},[2430],{"type":20,"value":2431},"        metrics.gauge('rows_removed', len(data) - len(clean))\n",{"type":14,"tag":53,"props":2433,"children":2434},{"class":55,"line":280},[2435],{"type":14,"tag":53,"props":2436,"children":2437},{},[2438],{"type":20,"value":2242},{"type":14,"tag":53,"props":2440,"children":2441},{"class":55,"line":289},[2442],{"type":14,"tag":53,"props":2443,"children":2444},{},[2445],{"type":20,"value":2446},"        validated = validate_quality(clean)\n",{"type":14,"tag":53,"props":2448,"children":2449},{"class":55,"line":298},[2450],{"type":14,"tag":53,"props":2451,"children":2452},{},[2453],{"type":20,"value":2454},"        metrics.counter('validation_passed', 1)\n",{"type":14,"tag":53,"props":2456,"children":2457},{"class":55,"line":307},[2458],{"type":14,"tag":53,"props":2459,"children":2460},{},[2461],{"type":20,"value":2242},{"type":14,"tag":53,"props":2463,"children":2464},{"class":55,"line":476},[2465],{"type":14,"tag":53,"props":2466,"children":2467},{},[2468],{"type":20,"value":2469},"        load_data(validated)\n",{"type":14,"tag":53,"props":2471,"children":2472},{"class":55,"line":485},[2473],{"type":14,"tag":53,"props":2474,"children":2475},{},[2476],{"type":20,"value":2477},"        metrics.counter('successful_loads', 1)\n",{"type":14,"tag":53,"props":2479,"children":2480},{"class":55,"line":494},[2481],{"type":14,"tag":53,"props":2482,"children":2483},{},[2484],{"type":20,"value":2242},{"type":14,"tag":53,"props":2486,"children":2487},{"class":55,"line":1155},[2488],{"type":14,"tag":53,"props":2489,"children":2490},{},[2491],{"type":20,"value":2178},{"type":14,"tag":53,"props":2493,"children":2494},{"class":55,"line":1163},[2495],{"type":14,"tag":53,"props":2496,"children":2497},{},[2498],{"type":20,"value":2499},"        metrics.counter('pipeline_failures', 1)\n",{"type":14,"tag":53,"props":2501,"children":2502},{"class":55,"line":1171},[2503],{"type":14,"tag":53,"props":2504,"children":2505},{},[2506],{"type":20,"value":2297},{"type":14,"tag":53,"props":2508,"children":2509},{"class":55,"line":1180},[2510],{"type":14,"tag":53,"props":2511,"children":2512},{},[2513],{"type":20,"value":98},{"type":14,"tag":53,"props":2515,"children":2516},{"class":55,"line":1189},[2517],{"type":14,"tag":53,"props":2518,"children":2519},{},[2520],{"type":20,"value":2521},"    finally:\n",{"type":14,"tag":53,"props":2523,"children":2524},{"class":55,"line":1198},[2525],{"type":14,"tag":53,"props":2526,"children":2527},{},[2528],{"type":20,"value":2529},"        duration = time.time() - start\n",{"type":14,"tag":53,"props":2531,"children":2532},{"class":55,"line":1207},[2533],{"type":14,"tag":53,"props":2534,"children":2535},{},[2536],{"type":20,"value":2537},"        metrics.histogram('pipeline_duration_seconds', duration)\n",{"type":14,"tag":53,"props":2539,"children":2540},{"class":55,"line":1822},[2541],{"type":14,"tag":53,"props":2542,"children":2543},{},[2544],{"type":20,"value":2545},"        print(f\"Pipeline took {duration:.1f} seconds\")\n",{"type":14,"tag":35,"props":2547,"children":2549},{"id":2548},"check-pipeline-status",[2550],{"type":20,"value":2551},"Check Pipeline Status",{"type":14,"tag":22,"props":2553,"children":2554},{},[2555],{"type":20,"value":2556},"In monitoring dashboard:",{"type":14,"tag":42,"props":2558,"children":2560},{"code":2559},"Daily Customer Pipeline:\n- Status: Running\n- Last run: 2024-06-12 09:15 UTC\n- Duration: 4 minutes 23 seconds\n- Rows extracted: 125,432\n- Rows cleaned: 124,998 (434 removed)\n- Validation: PASSED\n- Load: SUCCESS\n- Next run: Tomorrow 09:00 UTC\n",[2561],{"type":14,"tag":49,"props":2562,"children":2563},{"__ignoreMap":7},[2564],{"type":20,"value":2559},{"type":14,"tag":28,"props":2566,"children":2568},{"id":2567},"best-practices",[2569],{"type":20,"value":2570},"Best Practices",{"type":14,"tag":35,"props":2572,"children":2574},{"id":2573},"design-principles",[2575],{"type":20,"value":2576},"Design Principles",{"type":14,"tag":22,"props":2578,"children":2579},{},[2580,2582,2588,2590,2595,2597,2602,2604,2609,2611,2616],{"type":20,"value":2581},"✓ ",{"type":14,"tag":2583,"props":2584,"children":2585},"strong",{},[2586],{"type":20,"value":2587},"Idempotent",{"type":20,"value":2589},": Running twice gives same result\n✓ ",{"type":14,"tag":2583,"props":2591,"children":2592},{},[2593],{"type":20,"value":2594},"Atomic",{"type":20,"value":2596},": Either all succeeds or all fails\n✓ ",{"type":14,"tag":2583,"props":2598,"children":2599},{},[2600],{"type":20,"value":2601},"Observable",{"type":20,"value":2603},": You can see what happened\n✓ ",{"type":14,"tag":2583,"props":2605,"children":2606},{},[2607],{"type":20,"value":2608},"Resilient",{"type":20,"value":2610},": Handles failures gracefully\n✓ ",{"type":14,"tag":2583,"props":2612,"children":2613},{},[2614],{"type":20,"value":2615},"Scalable",{"type":20,"value":2617},": Handles growth over time",{"type":14,"tag":35,"props":2619,"children":2621},{"id":2620},"avoid-common-pitfalls",[2622],{"type":20,"value":2623},"Avoid Common Pitfalls",{"type":14,"tag":22,"props":2625,"children":2626},{},[2627,2629,2634,2636,2641],{"type":20,"value":2628},"❌ ",{"type":14,"tag":2583,"props":2630,"children":2631},{},[2632],{"type":20,"value":2633},"Don't",{"type":20,"value":2635}," hardcode paths and credentials\n✓ ",{"type":14,"tag":2583,"props":2637,"children":2638},{},[2639],{"type":20,"value":2640},"Do",{"type":20,"value":2642}," use configuration files and secrets",{"type":14,"tag":22,"props":2644,"children":2645},{},[2646,2647,2651,2653,2657],{"type":20,"value":2628},{"type":14,"tag":2583,"props":2648,"children":2649},{},[2650],{"type":20,"value":2633},{"type":20,"value":2652}," skip validation\n✓ ",{"type":14,"tag":2583,"props":2654,"children":2655},{},[2656],{"type":20,"value":2640},{"type":20,"value":2658}," validate at every step",{"type":14,"tag":22,"props":2660,"children":2661},{},[2662,2663,2667,2669,2673],{"type":20,"value":2628},{"type":14,"tag":2583,"props":2664,"children":2665},{},[2666],{"type":20,"value":2633},{"type":20,"value":2668}," assume source data is perfect\n✓ ",{"type":14,"tag":2583,"props":2670,"children":2671},{},[2672],{"type":20,"value":2640},{"type":20,"value":2674}," handle edge cases and errors",{"type":14,"tag":22,"props":2676,"children":2677},{},[2678,2679,2683,2685,2689],{"type":20,"value":2628},{"type":14,"tag":2583,"props":2680,"children":2681},{},[2682],{"type":20,"value":2633},{"type":20,"value":2684}," lose track of data lineage\n✓ ",{"type":14,"tag":2583,"props":2686,"children":2687},{},[2688],{"type":20,"value":2640},{"type":20,"value":2690}," document data flow and transformations",{"type":14,"tag":28,"props":2692,"children":2694},{"id":2693},"related-topics",[2695],{"type":20,"value":2696},"Related Topics",{"type":14,"tag":2698,"props":2699,"children":2700},"ul",{},[2701,2714,2725,2736],{"type":14,"tag":2702,"props":2703,"children":2704},"li",{},[2705,2712],{"type":14,"tag":2706,"props":2707,"children":2709},"a",{"href":2708},"\u002Fdocs\u002Ffeatures\u002Forchestration",[2710],{"type":20,"value":2711},"Orchestration",{"type":20,"value":2713}," - Schedule pipelines",{"type":14,"tag":2702,"props":2715,"children":2716},{},[2717,2723],{"type":14,"tag":2706,"props":2718,"children":2720},{"href":2719},"\u002Fdocs\u002Ffeatures\u002Flineage",[2721],{"type":20,"value":2722},"Lineage",{"type":20,"value":2724}," - Track data flow",{"type":14,"tag":2702,"props":2726,"children":2727},{},[2728,2734],{"type":14,"tag":2706,"props":2729,"children":2731},{"href":2730},"\u002Fdocs\u002Ffeatures\u002Fmetadata",[2732],{"type":20,"value":2733},"Metadata",{"type":20,"value":2735}," - Document data",{"type":14,"tag":2702,"props":2737,"children":2738},{},[2739,2745],{"type":14,"tag":2706,"props":2740,"children":2742},{"href":2741},"\u002Fdocs\u002Ffeatures\u002Fdata-workflow",[2743],{"type":20,"value":2744},"Data Workflow Guide",{"type":20,"value":2746}," - See big picture",{"type":14,"tag":2748,"props":2749,"children":2750},"style",{},[2751],{"type":20,"value":2752},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}",{"title":7,"searchDepth":65,"depth":65,"links":2754},[2755,2758,2762,2768,2772,2775,2779,2783,2787],{"id":30,"depth":65,"text":33,"children":2756},[2757],{"id":37,"depth":74,"text":40},{"id":133,"depth":65,"text":136,"children":2759},[2760,2761],{"id":139,"depth":74,"text":142},{"id":316,"depth":74,"text":319},{"id":503,"depth":65,"text":506,"children":2763},[2764,2765,2766,2767],{"id":509,"depth":74,"text":512},{"id":652,"depth":74,"text":655},{"id":788,"depth":74,"text":791},{"id":970,"depth":74,"text":973},{"id":1215,"depth":65,"text":1218,"children":2769},[2770,2771],{"id":1221,"depth":74,"text":1224},{"id":1381,"depth":74,"text":1384},{"id":1580,"depth":65,"text":1583,"children":2773},[2774],{"id":1586,"depth":74,"text":1589},{"id":1944,"depth":65,"text":1947,"children":2776},[2777,2778],{"id":1950,"depth":74,"text":1953},{"id":2121,"depth":74,"text":2124},{"id":2300,"depth":65,"text":2303,"children":2780},[2781,2782],{"id":2306,"depth":74,"text":2309},{"id":2548,"depth":74,"text":2551},{"id":2567,"depth":65,"text":2570,"children":2784},[2785,2786],{"id":2573,"depth":74,"text":2576},{"id":2620,"depth":74,"text":2623},{"id":2693,"depth":65,"text":2696},"markdown","content:docs:features:building-data-pipelines.md","content","docs\u002Ffeatures\u002Fbuilding-data-pipelines.md","docs\u002Ffeatures\u002Fbuilding-data-pipelines","md",1782233761740]