diff --git a/examples/homicide-dataset.csv b/examples/homicide-dataset.csv new file mode 100644 index 00000000..b49933c1 --- /dev/null +++ b/examples/homicide-dataset.csv @@ -0,0 +1,1309 @@ +index,"resp","race" +"1",0,"black" +"2",0,"black" +"3",0,"black" +"4",0,"black" +"5",0,"black" +"6",0,"black" +"7",0,"black" +"8",0,"black" +"9",0,"black" +"10",0,"black" +"11",0,"black" +"12",0,"black" +"13",0,"black" +"14",0,"black" +"15",0,"black" +"16",0,"black" +"17",0,"black" +"18",0,"black" +"19",0,"black" +"20",0,"black" +"21",0,"black" +"22",0,"black" +"23",0,"black" +"24",0,"black" +"25",0,"black" +"26",0,"black" +"27",0,"black" +"28",0,"black" +"29",0,"black" +"30",0,"black" +"31",0,"black" +"32",0,"black" +"33",0,"black" +"34",0,"black" +"35",0,"black" +"36",0,"black" +"37",0,"black" +"38",0,"black" +"39",0,"black" +"40",0,"black" +"41",0,"black" +"42",0,"black" +"43",0,"black" +"44",0,"black" +"45",0,"black" +"46",0,"black" +"47",0,"black" +"48",0,"black" +"49",0,"black" +"50",0,"black" +"51",0,"black" +"52",0,"black" +"53",0,"black" +"54",0,"black" +"55",0,"black" +"56",0,"black" +"57",0,"black" +"58",0,"black" +"59",0,"black" +"60",0,"black" +"61",0,"black" +"62",0,"black" +"63",0,"black" +"64",0,"black" +"65",0,"black" +"66",0,"black" +"67",0,"black" +"68",0,"black" +"69",0,"black" +"70",0,"black" +"71",0,"black" +"72",0,"black" +"73",0,"black" +"74",0,"black" +"75",0,"black" +"76",0,"black" +"77",0,"black" +"78",0,"black" +"79",0,"black" +"80",0,"black" +"81",0,"black" +"82",0,"black" +"83",0,"black" +"84",0,"black" +"85",0,"black" +"86",0,"black" +"87",0,"black" +"88",0,"black" +"89",0,"black" +"90",0,"black" +"91",0,"black" +"92",0,"black" +"93",0,"black" +"94",0,"black" +"95",0,"black" +"96",0,"black" +"97",0,"black" +"98",0,"black" +"99",0,"black" +"100",0,"black" +"101",0,"black" +"102",0,"black" +"103",0,"black" +"104",0,"black" +"105",0,"black" +"106",0,"black" +"107",0,"black" +"108",0,"black" +"109",0,"black" +"110",0,"black" +"111",0,"black" +"112",0,"black" +"113",0,"black" +"114",0,"black" +"115",0,"black" +"116",0,"black" +"117",0,"black" +"118",0,"black" +"119",0,"black" +"120",1,"black" +"121",1,"black" +"122",1,"black" +"123",1,"black" +"124",1,"black" +"125",1,"black" +"126",1,"black" +"127",1,"black" +"128",1,"black" +"129",1,"black" +"130",1,"black" +"131",1,"black" +"132",1,"black" +"133",1,"black" +"134",1,"black" +"135",1,"black" +"136",2,"black" +"137",2,"black" +"138",2,"black" +"139",2,"black" +"140",2,"black" +"141",2,"black" +"142",2,"black" +"143",2,"black" +"144",2,"black" +"145",2,"black" +"146",2,"black" +"147",2,"black" +"148",3,"black" +"149",3,"black" +"150",3,"black" +"151",3,"black" +"152",3,"black" +"153",3,"black" +"154",3,"black" +"155",4,"black" +"156",4,"black" +"157",4,"black" +"158",5,"black" +"159",5,"black" +"160",0,"white" +"161",0,"white" +"162",0,"white" +"163",0,"white" +"164",0,"white" +"165",0,"white" +"166",0,"white" +"167",0,"white" +"168",0,"white" +"169",0,"white" +"170",0,"white" +"171",0,"white" +"172",0,"white" +"173",0,"white" +"174",0,"white" +"175",0,"white" +"176",0,"white" +"177",0,"white" +"178",0,"white" +"179",0,"white" +"180",0,"white" +"181",0,"white" +"182",0,"white" +"183",0,"white" +"184",0,"white" +"185",0,"white" +"186",0,"white" +"187",0,"white" +"188",0,"white" +"189",0,"white" +"190",0,"white" +"191",0,"white" +"192",0,"white" +"193",0,"white" +"194",0,"white" +"195",0,"white" +"196",0,"white" +"197",0,"white" +"198",0,"white" +"199",0,"white" +"200",0,"white" +"201",0,"white" +"202",0,"white" +"203",0,"white" +"204",0,"white" +"205",0,"white" +"206",0,"white" +"207",0,"white" +"208",0,"white" +"209",0,"white" +"210",0,"white" +"211",0,"white" +"212",0,"white" +"213",0,"white" +"214",0,"white" +"215",0,"white" +"216",0,"white" +"217",0,"white" +"218",0,"white" +"219",0,"white" +"220",0,"white" +"221",0,"white" +"222",0,"white" +"223",0,"white" +"224",0,"white" +"225",0,"white" +"226",0,"white" +"227",0,"white" +"228",0,"white" +"229",0,"white" +"230",0,"white" +"231",0,"white" +"232",0,"white" +"233",0,"white" +"234",0,"white" +"235",0,"white" +"236",0,"white" +"237",0,"white" +"238",0,"white" +"239",0,"white" +"240",0,"white" +"241",0,"white" +"242",0,"white" +"243",0,"white" +"244",0,"white" +"245",0,"white" +"246",0,"white" +"247",0,"white" +"248",0,"white" +"249",0,"white" +"250",0,"white" +"251",0,"white" +"252",0,"white" +"253",0,"white" +"254",0,"white" +"255",0,"white" +"256",0,"white" +"257",0,"white" +"258",0,"white" +"259",0,"white" +"260",0,"white" +"261",0,"white" +"262",0,"white" +"263",0,"white" +"264",0,"white" +"265",0,"white" +"266",0,"white" +"267",0,"white" +"268",0,"white" +"269",0,"white" +"270",0,"white" +"271",0,"white" +"272",0,"white" +"273",0,"white" +"274",0,"white" +"275",0,"white" +"276",0,"white" +"277",0,"white" +"278",0,"white" +"279",0,"white" +"280",0,"white" +"281",0,"white" +"282",0,"white" +"283",0,"white" +"284",0,"white" +"285",0,"white" +"286",0,"white" +"287",0,"white" +"288",0,"white" +"289",0,"white" +"290",0,"white" +"291",0,"white" +"292",0,"white" +"293",0,"white" +"294",0,"white" +"295",0,"white" +"296",0,"white" +"297",0,"white" +"298",0,"white" +"299",0,"white" +"300",0,"white" +"301",0,"white" +"302",0,"white" +"303",0,"white" +"304",0,"white" +"305",0,"white" +"306",0,"white" +"307",0,"white" +"308",0,"white" +"309",0,"white" +"310",0,"white" +"311",0,"white" +"312",0,"white" +"313",0,"white" +"314",0,"white" +"315",0,"white" +"316",0,"white" +"317",0,"white" +"318",0,"white" +"319",0,"white" +"320",0,"white" +"321",0,"white" +"322",0,"white" +"323",0,"white" +"324",0,"white" +"325",0,"white" +"326",0,"white" +"327",0,"white" +"328",0,"white" +"329",0,"white" +"330",0,"white" +"331",0,"white" +"332",0,"white" +"333",0,"white" +"334",0,"white" +"335",0,"white" +"336",0,"white" +"337",0,"white" +"338",0,"white" +"339",0,"white" +"340",0,"white" +"341",0,"white" +"342",0,"white" +"343",0,"white" +"344",0,"white" +"345",0,"white" +"346",0,"white" +"347",0,"white" +"348",0,"white" +"349",0,"white" +"350",0,"white" +"351",0,"white" +"352",0,"white" +"353",0,"white" +"354",0,"white" +"355",0,"white" +"356",0,"white" +"357",0,"white" +"358",0,"white" +"359",0,"white" +"360",0,"white" +"361",0,"white" +"362",0,"white" +"363",0,"white" +"364",0,"white" +"365",0,"white" +"366",0,"white" +"367",0,"white" +"368",0,"white" +"369",0,"white" +"370",0,"white" +"371",0,"white" +"372",0,"white" +"373",0,"white" +"374",0,"white" +"375",0,"white" +"376",0,"white" +"377",0,"white" +"378",0,"white" +"379",0,"white" +"380",0,"white" +"381",0,"white" +"382",0,"white" +"383",0,"white" +"384",0,"white" +"385",0,"white" +"386",0,"white" +"387",0,"white" +"388",0,"white" +"389",0,"white" +"390",0,"white" +"391",0,"white" +"392",0,"white" +"393",0,"white" +"394",0,"white" +"395",0,"white" +"396",0,"white" +"397",0,"white" +"398",0,"white" +"399",0,"white" +"400",0,"white" +"401",0,"white" +"402",0,"white" +"403",0,"white" +"404",0,"white" +"405",0,"white" +"406",0,"white" +"407",0,"white" +"408",0,"white" +"409",0,"white" +"410",0,"white" +"411",0,"white" +"412",0,"white" +"413",0,"white" +"414",0,"white" +"415",0,"white" +"416",0,"white" +"417",0,"white" +"418",0,"white" +"419",0,"white" +"420",0,"white" +"421",0,"white" +"422",0,"white" +"423",0,"white" +"424",0,"white" +"425",0,"white" +"426",0,"white" +"427",0,"white" +"428",0,"white" +"429",0,"white" +"430",0,"white" +"431",0,"white" +"432",0,"white" +"433",0,"white" +"434",0,"white" +"435",0,"white" +"436",0,"white" +"437",0,"white" +"438",0,"white" +"439",0,"white" +"440",0,"white" +"441",0,"white" +"442",0,"white" +"443",0,"white" +"444",0,"white" +"445",0,"white" +"446",0,"white" +"447",0,"white" +"448",0,"white" +"449",0,"white" +"450",0,"white" +"451",0,"white" +"452",0,"white" +"453",0,"white" +"454",0,"white" +"455",0,"white" +"456",0,"white" +"457",0,"white" +"458",0,"white" +"459",0,"white" +"460",0,"white" +"461",0,"white" +"462",0,"white" +"463",0,"white" +"464",0,"white" +"465",0,"white" +"466",0,"white" +"467",0,"white" +"468",0,"white" +"469",0,"white" +"470",0,"white" +"471",0,"white" +"472",0,"white" +"473",0,"white" +"474",0,"white" +"475",0,"white" +"476",0,"white" +"477",0,"white" +"478",0,"white" +"479",0,"white" +"480",0,"white" +"481",0,"white" +"482",0,"white" +"483",0,"white" +"484",0,"white" +"485",0,"white" +"486",0,"white" +"487",0,"white" +"488",0,"white" +"489",0,"white" +"490",0,"white" +"491",0,"white" +"492",0,"white" +"493",0,"white" +"494",0,"white" +"495",0,"white" +"496",0,"white" +"497",0,"white" +"498",0,"white" +"499",0,"white" +"500",0,"white" +"501",0,"white" +"502",0,"white" +"503",0,"white" +"504",0,"white" +"505",0,"white" +"506",0,"white" +"507",0,"white" +"508",0,"white" +"509",0,"white" +"510",0,"white" +"511",0,"white" +"512",0,"white" +"513",0,"white" +"514",0,"white" +"515",0,"white" +"516",0,"white" +"517",0,"white" +"518",0,"white" +"519",0,"white" +"520",0,"white" +"521",0,"white" +"522",0,"white" +"523",0,"white" +"524",0,"white" +"525",0,"white" +"526",0,"white" +"527",0,"white" +"528",0,"white" +"529",0,"white" +"530",0,"white" +"531",0,"white" +"532",0,"white" +"533",0,"white" +"534",0,"white" +"535",0,"white" +"536",0,"white" +"537",0,"white" +"538",0,"white" +"539",0,"white" +"540",0,"white" +"541",0,"white" +"542",0,"white" +"543",0,"white" +"544",0,"white" +"545",0,"white" +"546",0,"white" +"547",0,"white" +"548",0,"white" +"549",0,"white" +"550",0,"white" +"551",0,"white" +"552",0,"white" +"553",0,"white" +"554",0,"white" +"555",0,"white" +"556",0,"white" +"557",0,"white" +"558",0,"white" +"559",0,"white" +"560",0,"white" +"561",0,"white" +"562",0,"white" +"563",0,"white" +"564",0,"white" +"565",0,"white" +"566",0,"white" +"567",0,"white" +"568",0,"white" +"569",0,"white" +"570",0,"white" +"571",0,"white" +"572",0,"white" +"573",0,"white" +"574",0,"white" +"575",0,"white" +"576",0,"white" +"577",0,"white" +"578",0,"white" +"579",0,"white" +"580",0,"white" +"581",0,"white" +"582",0,"white" +"583",0,"white" +"584",0,"white" +"585",0,"white" +"586",0,"white" +"587",0,"white" +"588",0,"white" +"589",0,"white" +"590",0,"white" +"591",0,"white" +"592",0,"white" +"593",0,"white" +"594",0,"white" +"595",0,"white" +"596",0,"white" +"597",0,"white" +"598",0,"white" +"599",0,"white" +"600",0,"white" +"601",0,"white" +"602",0,"white" +"603",0,"white" +"604",0,"white" +"605",0,"white" +"606",0,"white" +"607",0,"white" +"608",0,"white" +"609",0,"white" +"610",0,"white" +"611",0,"white" +"612",0,"white" +"613",0,"white" +"614",0,"white" +"615",0,"white" +"616",0,"white" +"617",0,"white" +"618",0,"white" +"619",0,"white" +"620",0,"white" +"621",0,"white" +"622",0,"white" +"623",0,"white" +"624",0,"white" +"625",0,"white" +"626",0,"white" +"627",0,"white" +"628",0,"white" +"629",0,"white" +"630",0,"white" +"631",0,"white" +"632",0,"white" +"633",0,"white" +"634",0,"white" +"635",0,"white" +"636",0,"white" +"637",0,"white" +"638",0,"white" +"639",0,"white" +"640",0,"white" +"641",0,"white" +"642",0,"white" +"643",0,"white" +"644",0,"white" +"645",0,"white" +"646",0,"white" +"647",0,"white" +"648",0,"white" +"649",0,"white" +"650",0,"white" +"651",0,"white" +"652",0,"white" +"653",0,"white" +"654",0,"white" +"655",0,"white" +"656",0,"white" +"657",0,"white" +"658",0,"white" +"659",0,"white" +"660",0,"white" +"661",0,"white" +"662",0,"white" +"663",0,"white" +"664",0,"white" +"665",0,"white" +"666",0,"white" +"667",0,"white" +"668",0,"white" +"669",0,"white" +"670",0,"white" +"671",0,"white" +"672",0,"white" +"673",0,"white" +"674",0,"white" +"675",0,"white" +"676",0,"white" +"677",0,"white" +"678",0,"white" +"679",0,"white" +"680",0,"white" +"681",0,"white" +"682",0,"white" +"683",0,"white" +"684",0,"white" +"685",0,"white" +"686",0,"white" +"687",0,"white" +"688",0,"white" +"689",0,"white" +"690",0,"white" +"691",0,"white" +"692",0,"white" +"693",0,"white" +"694",0,"white" +"695",0,"white" +"696",0,"white" +"697",0,"white" +"698",0,"white" +"699",0,"white" +"700",0,"white" +"701",0,"white" +"702",0,"white" +"703",0,"white" +"704",0,"white" +"705",0,"white" +"706",0,"white" +"707",0,"white" +"708",0,"white" +"709",0,"white" +"710",0,"white" +"711",0,"white" +"712",0,"white" +"713",0,"white" +"714",0,"white" +"715",0,"white" +"716",0,"white" +"717",0,"white" +"718",0,"white" +"719",0,"white" +"720",0,"white" +"721",0,"white" +"722",0,"white" +"723",0,"white" +"724",0,"white" +"725",0,"white" +"726",0,"white" +"727",0,"white" +"728",0,"white" +"729",0,"white" +"730",0,"white" +"731",0,"white" +"732",0,"white" +"733",0,"white" +"734",0,"white" +"735",0,"white" +"736",0,"white" +"737",0,"white" +"738",0,"white" +"739",0,"white" +"740",0,"white" +"741",0,"white" +"742",0,"white" +"743",0,"white" +"744",0,"white" +"745",0,"white" +"746",0,"white" +"747",0,"white" +"748",0,"white" +"749",0,"white" +"750",0,"white" +"751",0,"white" +"752",0,"white" +"753",0,"white" +"754",0,"white" +"755",0,"white" +"756",0,"white" +"757",0,"white" +"758",0,"white" +"759",0,"white" +"760",0,"white" +"761",0,"white" +"762",0,"white" +"763",0,"white" +"764",0,"white" +"765",0,"white" +"766",0,"white" +"767",0,"white" +"768",0,"white" +"769",0,"white" +"770",0,"white" +"771",0,"white" +"772",0,"white" +"773",0,"white" +"774",0,"white" +"775",0,"white" +"776",0,"white" +"777",0,"white" +"778",0,"white" +"779",0,"white" +"780",0,"white" +"781",0,"white" +"782",0,"white" +"783",0,"white" +"784",0,"white" +"785",0,"white" +"786",0,"white" +"787",0,"white" +"788",0,"white" +"789",0,"white" +"790",0,"white" +"791",0,"white" +"792",0,"white" +"793",0,"white" +"794",0,"white" +"795",0,"white" +"796",0,"white" +"797",0,"white" +"798",0,"white" +"799",0,"white" +"800",0,"white" +"801",0,"white" +"802",0,"white" +"803",0,"white" +"804",0,"white" +"805",0,"white" +"806",0,"white" +"807",0,"white" +"808",0,"white" +"809",0,"white" +"810",0,"white" +"811",0,"white" +"812",0,"white" +"813",0,"white" +"814",0,"white" +"815",0,"white" +"816",0,"white" +"817",0,"white" +"818",0,"white" +"819",0,"white" +"820",0,"white" +"821",0,"white" +"822",0,"white" +"823",0,"white" +"824",0,"white" +"825",0,"white" +"826",0,"white" +"827",0,"white" +"828",0,"white" +"829",0,"white" +"830",0,"white" +"831",0,"white" +"832",0,"white" +"833",0,"white" +"834",0,"white" +"835",0,"white" +"836",0,"white" +"837",0,"white" +"838",0,"white" +"839",0,"white" +"840",0,"white" +"841",0,"white" +"842",0,"white" +"843",0,"white" +"844",0,"white" +"845",0,"white" +"846",0,"white" +"847",0,"white" +"848",0,"white" +"849",0,"white" +"850",0,"white" +"851",0,"white" +"852",0,"white" +"853",0,"white" +"854",0,"white" +"855",0,"white" +"856",0,"white" +"857",0,"white" +"858",0,"white" +"859",0,"white" +"860",0,"white" +"861",0,"white" +"862",0,"white" +"863",0,"white" +"864",0,"white" +"865",0,"white" +"866",0,"white" +"867",0,"white" +"868",0,"white" +"869",0,"white" +"870",0,"white" +"871",0,"white" +"872",0,"white" +"873",0,"white" +"874",0,"white" +"875",0,"white" +"876",0,"white" +"877",0,"white" +"878",0,"white" +"879",0,"white" +"880",0,"white" +"881",0,"white" +"882",0,"white" +"883",0,"white" +"884",0,"white" +"885",0,"white" +"886",0,"white" +"887",0,"white" +"888",0,"white" +"889",0,"white" +"890",0,"white" +"891",0,"white" +"892",0,"white" +"893",0,"white" +"894",0,"white" +"895",0,"white" +"896",0,"white" +"897",0,"white" +"898",0,"white" +"899",0,"white" +"900",0,"white" +"901",0,"white" +"902",0,"white" +"903",0,"white" +"904",0,"white" +"905",0,"white" +"906",0,"white" +"907",0,"white" +"908",0,"white" +"909",0,"white" +"910",0,"white" +"911",0,"white" +"912",0,"white" +"913",0,"white" +"914",0,"white" +"915",0,"white" +"916",0,"white" +"917",0,"white" +"918",0,"white" +"919",0,"white" +"920",0,"white" +"921",0,"white" +"922",0,"white" +"923",0,"white" +"924",0,"white" +"925",0,"white" +"926",0,"white" +"927",0,"white" +"928",0,"white" +"929",0,"white" +"930",0,"white" +"931",0,"white" +"932",0,"white" +"933",0,"white" +"934",0,"white" +"935",0,"white" +"936",0,"white" +"937",0,"white" +"938",0,"white" +"939",0,"white" +"940",0,"white" +"941",0,"white" +"942",0,"white" +"943",0,"white" +"944",0,"white" +"945",0,"white" +"946",0,"white" +"947",0,"white" +"948",0,"white" +"949",0,"white" +"950",0,"white" +"951",0,"white" +"952",0,"white" +"953",0,"white" +"954",0,"white" +"955",0,"white" +"956",0,"white" +"957",0,"white" +"958",0,"white" +"959",0,"white" +"960",0,"white" +"961",0,"white" +"962",0,"white" +"963",0,"white" +"964",0,"white" +"965",0,"white" +"966",0,"white" +"967",0,"white" +"968",0,"white" +"969",0,"white" +"970",0,"white" +"971",0,"white" +"972",0,"white" +"973",0,"white" +"974",0,"white" +"975",0,"white" +"976",0,"white" +"977",0,"white" +"978",0,"white" +"979",0,"white" +"980",0,"white" +"981",0,"white" +"982",0,"white" +"983",0,"white" +"984",0,"white" +"985",0,"white" +"986",0,"white" +"987",0,"white" +"988",0,"white" +"989",0,"white" +"990",0,"white" +"991",0,"white" +"992",0,"white" +"993",0,"white" +"994",0,"white" +"995",0,"white" +"996",0,"white" +"997",0,"white" +"998",0,"white" +"999",0,"white" +"1000",0,"white" +"1001",0,"white" +"1002",0,"white" +"1003",0,"white" +"1004",0,"white" +"1005",0,"white" +"1006",0,"white" +"1007",0,"white" +"1008",0,"white" +"1009",0,"white" +"1010",0,"white" +"1011",0,"white" +"1012",0,"white" +"1013",0,"white" +"1014",0,"white" +"1015",0,"white" +"1016",0,"white" +"1017",0,"white" +"1018",0,"white" +"1019",0,"white" +"1020",0,"white" +"1021",0,"white" +"1022",0,"white" +"1023",0,"white" +"1024",0,"white" +"1025",0,"white" +"1026",0,"white" +"1027",0,"white" +"1028",0,"white" +"1029",0,"white" +"1030",0,"white" +"1031",0,"white" +"1032",0,"white" +"1033",0,"white" +"1034",0,"white" +"1035",0,"white" +"1036",0,"white" +"1037",0,"white" +"1038",0,"white" +"1039",0,"white" +"1040",0,"white" +"1041",0,"white" +"1042",0,"white" +"1043",0,"white" +"1044",0,"white" +"1045",0,"white" +"1046",0,"white" +"1047",0,"white" +"1048",0,"white" +"1049",0,"white" +"1050",0,"white" +"1051",0,"white" +"1052",0,"white" +"1053",0,"white" +"1054",0,"white" +"1055",0,"white" +"1056",0,"white" +"1057",0,"white" +"1058",0,"white" +"1059",0,"white" +"1060",0,"white" +"1061",0,"white" +"1062",0,"white" +"1063",0,"white" +"1064",0,"white" +"1065",0,"white" +"1066",0,"white" +"1067",0,"white" +"1068",0,"white" +"1069",0,"white" +"1070",0,"white" +"1071",0,"white" +"1072",0,"white" +"1073",0,"white" +"1074",0,"white" +"1075",0,"white" +"1076",0,"white" +"1077",0,"white" +"1078",0,"white" +"1079",0,"white" +"1080",0,"white" +"1081",0,"white" +"1082",0,"white" +"1083",0,"white" +"1084",0,"white" +"1085",0,"white" +"1086",0,"white" +"1087",0,"white" +"1088",0,"white" +"1089",0,"white" +"1090",0,"white" +"1091",0,"white" +"1092",0,"white" +"1093",0,"white" +"1094",0,"white" +"1095",0,"white" +"1096",0,"white" +"1097",0,"white" +"1098",0,"white" +"1099",0,"white" +"1100",0,"white" +"1101",0,"white" +"1102",0,"white" +"1103",0,"white" +"1104",0,"white" +"1105",0,"white" +"1106",0,"white" +"1107",0,"white" +"1108",0,"white" +"1109",0,"white" +"1110",0,"white" +"1111",0,"white" +"1112",0,"white" +"1113",0,"white" +"1114",0,"white" +"1115",0,"white" +"1116",0,"white" +"1117",0,"white" +"1118",0,"white" +"1119",0,"white" +"1120",0,"white" +"1121",0,"white" +"1122",0,"white" +"1123",0,"white" +"1124",0,"white" +"1125",0,"white" +"1126",0,"white" +"1127",0,"white" +"1128",0,"white" +"1129",0,"white" +"1130",0,"white" +"1131",0,"white" +"1132",0,"white" +"1133",0,"white" +"1134",0,"white" +"1135",0,"white" +"1136",0,"white" +"1137",0,"white" +"1138",0,"white" +"1139",0,"white" +"1140",0,"white" +"1141",0,"white" +"1142",0,"white" +"1143",0,"white" +"1144",0,"white" +"1145",0,"white" +"1146",0,"white" +"1147",0,"white" +"1148",0,"white" +"1149",0,"white" +"1150",0,"white" +"1151",0,"white" +"1152",0,"white" +"1153",0,"white" +"1154",0,"white" +"1155",0,"white" +"1156",0,"white" +"1157",0,"white" +"1158",0,"white" +"1159",0,"white" +"1160",0,"white" +"1161",0,"white" +"1162",0,"white" +"1163",0,"white" +"1164",0,"white" +"1165",0,"white" +"1166",0,"white" +"1167",0,"white" +"1168",0,"white" +"1169",0,"white" +"1170",0,"white" +"1171",0,"white" +"1172",0,"white" +"1173",0,"white" +"1174",0,"white" +"1175",0,"white" +"1176",0,"white" +"1177",0,"white" +"1178",0,"white" +"1179",0,"white" +"1180",0,"white" +"1181",0,"white" +"1182",0,"white" +"1183",0,"white" +"1184",0,"white" +"1185",0,"white" +"1186",0,"white" +"1187",0,"white" +"1188",0,"white" +"1189",0,"white" +"1190",0,"white" +"1191",0,"white" +"1192",0,"white" +"1193",0,"white" +"1194",0,"white" +"1195",0,"white" +"1196",0,"white" +"1197",0,"white" +"1198",0,"white" +"1199",0,"white" +"1200",0,"white" +"1201",0,"white" +"1202",0,"white" +"1203",0,"white" +"1204",0,"white" +"1205",0,"white" +"1206",0,"white" +"1207",0,"white" +"1208",0,"white" +"1209",0,"white" +"1210",0,"white" +"1211",0,"white" +"1212",0,"white" +"1213",0,"white" +"1214",0,"white" +"1215",0,"white" +"1216",0,"white" +"1217",0,"white" +"1218",0,"white" +"1219",0,"white" +"1220",0,"white" +"1221",0,"white" +"1222",0,"white" +"1223",0,"white" +"1224",0,"white" +"1225",0,"white" +"1226",0,"white" +"1227",0,"white" +"1228",0,"white" +"1229",0,"white" +"1230",1,"white" +"1231",1,"white" +"1232",1,"white" +"1233",1,"white" +"1234",1,"white" +"1235",1,"white" +"1236",1,"white" +"1237",1,"white" +"1238",1,"white" +"1239",1,"white" +"1240",1,"white" +"1241",1,"white" +"1242",1,"white" +"1243",1,"white" +"1244",1,"white" +"1245",1,"white" +"1246",1,"white" +"1247",1,"white" +"1248",1,"white" +"1249",1,"white" +"1250",1,"white" +"1251",1,"white" +"1252",1,"white" +"1253",1,"white" +"1254",1,"white" +"1255",1,"white" +"1256",1,"white" +"1257",1,"white" +"1258",1,"white" +"1259",1,"white" +"1260",1,"white" +"1261",1,"white" +"1262",1,"white" +"1263",1,"white" +"1264",1,"white" +"1265",1,"white" +"1266",1,"white" +"1267",1,"white" +"1268",1,"white" +"1269",1,"white" +"1270",1,"white" +"1271",1,"white" +"1272",1,"white" +"1273",1,"white" +"1274",1,"white" +"1275",1,"white" +"1276",1,"white" +"1277",1,"white" +"1278",1,"white" +"1279",1,"white" +"1280",1,"white" +"1281",1,"white" +"1282",1,"white" +"1283",1,"white" +"1284",1,"white" +"1285",1,"white" +"1286",1,"white" +"1287",1,"white" +"1288",1,"white" +"1289",1,"white" +"1290",2,"white" +"1291",2,"white" +"1292",2,"white" +"1293",2,"white" +"1294",2,"white" +"1295",2,"white" +"1296",2,"white" +"1297",2,"white" +"1298",2,"white" +"1299",2,"white" +"1300",2,"white" +"1301",2,"white" +"1302",2,"white" +"1303",2,"white" +"1304",3,"white" +"1305",3,"white" +"1306",3,"white" +"1307",3,"white" +"1308",6,"white" diff --git a/examples/plot_neg_binomial_homicide_victims.py b/examples/plot_neg_binomial_homicide_victims.py new file mode 100644 index 00000000..e24ed363 --- /dev/null +++ b/examples/plot_neg_binomial_homicide_victims.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +""" +======================================= +Number of Homicide victims +======================================= + +This is an example of GLM with negative binomial distribution. +We wrote this example taking inspiration from the R community +below +https://data.library.virginia.edu/getting-started-with-negative-binomial-regression-modeling/ + +The data used are taken from a survey which asked people +how many homicide victims they knew. The variables are "resp" and "race". +The former indicates how many victims the respondent knew, the latter +the ethnic group of the respondent (black or white). + +The nature of the empirical data suggests that we need to model +count data (the number of homicide victims). In such scenarios, +a common model we could use is the Poisson regression. + +However, if we inspect the dataset more closely, we will notice that +the dataset is over-dispersed since the conditional mean exceeds the +conditional variance. Basically, for each race (black or white, +the variance is double the mean. + +We would need to apply another model which is the Negative Binomial regression. + +In this example, we will see how the Negative Binomial model will produce better +results thanks to his dispersion parameter. + +""" + +######################################################## +# Author: Giovanni De Toni +# License: MIT +######################################################## + +######################################################## +# Import libraries + +import pandas as pd +import numpy as np +from pyglmnet import GLM + +import matplotlib.pyplot as plt + +######################################################## +# Read and preprocess data +df = pd.read_csv("./homicide-dataset.csv")[['resp', 'race']] + +######################################################## +# Histogram of type of program they are enrolled +df.hist(column='resp', by=['race']) +plt.show() + +# Print mean and standard deviation for each program enrolled. +# We can see from here that the variance is higher that then mean for all +# the levels, therefore hinting for over-dispersion. +prog_mean = df.groupby('race').agg({'resp': ['mean', 'std']}) +print(prog_mean) + +######################################################## +# Feature +# Model the race as a binary categorical feature +df.race = pd.Categorical(df.race) +df["race_code"] = df.race.cat.codes + +Xdsgn = df.drop(['race', 'resp'], axis=1) +y = df['resp'].values + +######################################################## +# Fit the model using the Negative Binomial +glm_nb = GLM(distr='neg-binomial', + alpha=0.0, + reg_lambda=0.0, + score_metric='pseudo_R2', + verbose=True, + learning_rate=1e-1, + max_iter=5000, + theta=0.20) +glm_nb.fit(Xdsgn, y) +print(glm_nb.beta0_, glm_nb.beta_) + +######################################################## +# Fit the model using the Poisson regression instead +glm_poisson = GLM(distr='poisson', + alpha=0.0, + reg_lambda=0.0, + score_metric='pseudo_R2', + verbose=True, + max_iter=5000, + learning_rate=1e-1) +glm_poisson.fit(Xdsgn, y) +print(glm_poisson.beta0_, glm_poisson.beta_) + +######################################################## +# Plot convergence information for both negative binomial and poisson +glm_nb.plot_convergence() +glm_poisson.plot_convergence() +plt.show() + +######################################################## +# Simulate the prediction given new data +# +# The Poisson model outputs the predicted mean (and therefore variance) of the distribution. +# However, we can see from the exploratory analysis that the observed standard deviations are much larger. +# The Negative Binomal generates the same mean, but we can use the dispersion parameter to +# compute a more accurate estimate of the standard deviations (for both white and black classes). +# +pred_nb = np.array(glm_nb.predict([[0], [1]])) +pred_poisson = np.array(glm_poisson.predict([[0], [1]])) +print("") +print("NB Predicted means+std (black/white): {}, {}".format(pred_nb, np.sqrt(pred_nb+(pred_nb**2)*1/0.20))) +print("Poisson Predicted means+std (black/white): {}, {}".format(pred_poisson, np.sqrt(pred_poisson))) \ No newline at end of file diff --git a/examples/plot_negative_binomial.py b/examples/plot_negative_binomial.py new file mode 100644 index 00000000..93d95047 --- /dev/null +++ b/examples/plot_negative_binomial.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +""" +======================================= +GLM with Negative Binomial Distribution +======================================= + +This is an example of GLM with negative binomial distribution. +We wrote this example taking inspiration from the R community +below +https://stats.idre.ucla.edu/r/dae/negative-binomial-regression/ + +Here, we would like to predict the number of days absence of high school +juniors at two schools from there type of program they are enrolled, +and their math score. + +The nature of the empirical data suggests that we need to model +count data (the number of days absent). In such scenarios, a common model +we could use is the Poisson regression. + +However, if we inspect the dataset more closely, we will notice that +the dataset is over-dispersed since the conditional mean exceeds the +conditional variance. We would need to apply another model which is the +Negative Binomial regression. + +The Negative Binomial regression can be seen as a mixture of Poisson +regression in which the mean of the Poisson distribution can be seen +as a random variable drawn from a Gamma distribution. + +This gives us an extra parameter which can be used to account for the over +dispersion. + +In this example, we will apply both Negative Binomial regression and +Poisson regression on the dataset. +""" + +######################################################## +# Author: Titipat Achakulvisut +# Giovanni De Toni +# License: MIT +######################################################## + +######################################################## +# Import relevance libraries + +import pandas as pd +from pyglmnet import GLM + +import matplotlib.pyplot as plt + +######################################################## +# Read and preprocess data +df = pd.read_stata("https://stats.idre.ucla.edu/stat/stata/dae/nb_data.dta") + +######################################################## +# Change the program type to string (we don't need it) +df['prog'].replace({1:"General", 2:"Academic", 3:"Vocational"}, inplace=True) + +######################################################## +# Histogram of type of program they are enrolled +df.hist(column='daysabs', by=['prog']) +plt.show() + +# Print mean and standard deviation for each program enrolled. +# We can see from here that the variance is higher that then mean for all +# the levels, therefore hinting for over-dispersion. +prog_mean = df.groupby('prog').agg({'daysabs': ['mean', 'std']}) +print(prog_mean) + +######################################################## +# Feature +X = df.drop('daysabs', axis=1) +y = df['daysabs'].values + +######################################################## +# design matrix +program_df = pd.get_dummies(df.prog) +program_df_cleaned = program_df.drop('Vocational', axis=1)[["General", "Academic"]] +Xdsgn = pd.concat((df['math'], program_df_cleaned), axis=1).values + +######################################################## +# Fit the model using the GLM +glm_nb = GLM(distr='neg-binomial', + alpha=0.0, + reg_lambda=0.0, + score_metric='pseudo_R2', + verbose=True, + learning_rate=1e-6, + theta=1.032713156) +glm_nb.fit(Xdsgn, y) +print(glm_nb.beta0_, glm_nb.beta_) + +######################################################## +# Fit the model using the Poisson regression instead +glm_poisson = GLM(distr='poisson', + alpha=0.0, + reg_lambda=0.0, + score_metric='pseudo_R2', + verbose=True, + learning_rate=1e-6) +glm_poisson.fit(Xdsgn, y) +print(glm_poisson.beta0_, glm_poisson.beta_) + + +######################################################## +# Plot convergence information for both negative binomial and poisson +glm_nb.plot_convergence() +glm_poisson.plot_convergence() +plt.show() \ No newline at end of file