Pattern Causality in a large dataset
Stavros Stavroglou, Athanasios Pantelous, Hui Wang
Source:vignettes/matrix.Rmd
matrix.Rmd
If we want to catch the hidden causality in a large and complex system, which includes a lot series, we also provide some functions to show it better.
Pattern causality matrix
The DJS data includes 29 stock price series, which is large enough for our analysis.
library(patterncausality)
data(DJS)
#head(DJS)
Then we could estimate the pattern causality in this dataset by
pcMatrix
function.
dataset <- DJS[,-1] # remove the date column
result <- pcMatrix(dataset, E = 3, tau = 1, metric = "euclidean", h = 2, weighted = TRUE)
Then we could see the three matrixs after calculating.
head(result$positive)
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] NA 0.4464555 0.3686338 0.4334828 0.4497469 0.4185127
#> [2,] 0.4135754 NA 0.3552519 0.4511914 0.4284627 0.3827940
#> [3,] 0.3920266 0.4120172 NA 0.3998358 0.4099279 0.3393939
#> [4,] 0.4114420 0.4368088 0.3821340 NA 0.4387443 0.3498717
#> [5,] 0.4311008 0.4414003 0.4054487 0.4507148 NA 0.4685803
#> [6,] 0.3847981 0.3716814 0.3782051 0.3813694 0.4300077 NA
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> [1,] 0.4137931 0.3973064 0.4672897 0.3859504 0.4559156 0.4193548
#> [2,] 0.4149026 0.3819320 0.4179343 0.3576105 0.4190476 0.4506627
#> [3,] 0.3786247 0.2782609 0.3852107 0.3427386 0.3776824 0.3878357
#> [4,] 0.4031852 0.3473590 0.4079602 0.3453947 0.4336283 0.4123879
#> [5,] 0.4017642 0.3725000 0.4882353 0.4197719 0.4362205 0.4740061
#> [6,] 0.3394415 0.3746702 0.3904221 0.5105802 0.3807947 0.4021824
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> [1,] 0.4021036 0.4080505 0.3995253 0.4121014 0.4221840 0.3377897
#> [2,] 0.4022169 0.4017642 0.3537937 0.4459459 0.3902439 0.3885516
#> [3,] 0.3935644 0.3949247 0.3049956 0.3900245 0.3573854 0.2811355
#> [4,] 0.3575655 0.3755069 0.3682403 0.3904221 0.3144816 0.3555556
#> [5,] 0.4344718 0.4030327 0.3213028 0.4351563 0.3818636 0.3592073
#> [6,] 0.3965658 0.3848168 0.3670782 0.3573123 0.3670569 0.3561888
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> [1,] 0.3647364 0.4228243 0.3413422 0.3581197 0.4031373 0.4038462
#> [2,] 0.3686306 0.3764706 0.3641869 0.3474218 0.4297386 0.4132997
#> [3,] 0.3933764 0.3582215 0.2842960 0.2729659 0.3705933 0.3204904
#> [4,] 0.3424296 0.3800648 0.3422645 0.3417722 0.3878583 0.3823287
#> [5,] 0.3909014 0.3858203 0.3493151 0.3142123 0.4148936 0.3838631
#> [6,] 0.3259494 0.3466440 0.3212389 0.3312388 0.3469055 0.3327815
#> [,25] [,26] [,27] [,28] [,29]
#> [1,] 0.4668630 0.3491525 0.3667482 0.3673820 0.4131642
#> [2,] 0.4375000 0.3766122 0.3549518 0.3470437 0.4271318
#> [3,] 0.3851295 0.3144816 0.3289474 0.2942255 0.3732681
#> [4,] 0.4591915 0.3112640 0.3524229 0.3477157 0.4026080
#> [5,] 0.4982505 0.3713178 0.3520140 0.3539604 0.3933447
#> [6,] 0.3809148 0.3279797 0.3393162 0.3135739 0.3718506
Then we can visualize the result by plotMatrix
function.
- positive status
plotMatrix(result, status = "positive", method = "circle")
- negative status
plotMatrix(result, status = "negative", method = "circle")
- dark status
plotMatrix(result, status = "dark", method = "circle")
We could see that there is a obvious positive connection in this system.
Pattern causality effect
After we get the matrix, we can find the total effect in the system,
we provide the function pcEffect
to achieve this
target.
effects <- pcEffect(result)
print(effects)
#> $positive
#> received exerted Diff
#> X3M 1129.5512 1136.2993 -6.7480867
#> American.Express 1109.8869 1164.8310 -54.9440870
#> Apple 992.1580 1023.7801 -31.6221209
#> Boeing 1058.0608 1120.4791 -62.4182345
#> Caterpillar 1133.2671 1146.7383 -13.4712060
#> Chevron 1031.1565 1044.2211 -13.0646022
#> Cisco.Systems 1042.7957 1079.4614 -36.6656962
#> Coca.Cola 1033.7264 992.8883 40.8380558
#> DowDuPont 1111.3344 1112.0656 -0.7312588
#> ExxonMobil 1062.4768 989.9327 72.5440673
#> General.Electric 1105.2920 1126.1430 -20.8510095
#> Goldman.Sachs 1037.5409 1146.9424 -109.4014643
#> IBM 1121.0063 1079.0178 41.9884669
#> Intel 1069.6753 1064.0050 5.6702996
#> Johnson...Johnson 1086.8271 969.1108 117.7163617
#> JPMorgan.Chase 1102.6635 1104.8011 -2.1376396
#> McDonald.s 990.2739 1024.9226 -34.6487534
#> Merck 969.0533 1016.5654 -47.5120847
#> Microsoft 1091.7622 1038.9909 52.7713246
#> Nike 1023.2977 1022.6086 0.6891073
#> Pfizer 1012.0584 998.4551 13.6033180
#> Procter...Gamble 986.9884 925.6683 61.3201536
#> The.Home.Depot 1093.7925 1104.2273 -10.4347289
#> Travelers 1082.7487 1039.6915 43.0572732
#> United.Technologies 1110.7435 1148.4401 -37.6965110
#> UnitedHealth.Group 892.9267 951.1590 -58.2322811
#> Verizon 1033.1636 993.5064 39.6571690
#> Walmart 1051.3388 953.3171 98.0216941
#> Walt.Disney 1078.3198 1125.6173 -47.2975263
#>
#> $negative
#> received exerted Diff
#> X3M 373.8818 353.3466 20.535222
#> American.Express 373.7054 345.4601 28.245358
#> Apple 473.9539 432.2883 41.665599
#> Boeing 401.8412 358.2462 43.595028
#> Caterpillar 365.3584 345.8385 19.519878
#> Chevron 421.3270 419.1647 2.162316
#> Cisco.Systems 408.5475 400.0414 8.506160
#> Coca.Cola 411.4090 451.9012 -40.492138
#> DowDuPont 388.5262 373.7321 14.794158
#> ExxonMobil 412.9172 463.7885 -50.871308
#> General.Electric 355.7266 378.4351 -22.708502
#> Goldman.Sachs 422.4826 359.7218 62.760815
#> IBM 363.1531 418.7563 -55.603273
#> Intel 399.7430 403.0307 -3.287757
#> Johnson...Johnson 399.0242 478.5350 -79.510839
#> JPMorgan.Chase 375.9222 369.4981 6.424116
#> McDonald.s 458.5950 431.6724 26.922563
#> Merck 435.4795 446.8203 -11.340824
#> Microsoft 403.6935 437.4062 -33.712621
#> Nike 451.1064 412.0858 39.020546
#> Pfizer 426.8867 436.6976 -9.810898
#> Procter...Gamble 450.7500 510.3111 -59.561017
#> The.Home.Depot 400.3182 381.2213 19.096924
#> Travelers 402.6511 435.1085 -32.457437
#> United.Technologies 386.1009 347.3862 38.714688
#> UnitedHealth.Group 509.4764 470.3607 39.115667
#> Verizon 438.1047 432.3609 5.743730
#> Walmart 414.9423 484.7629 -69.820578
#> Walt.Disney 413.8462 361.4918 52.354423
#>
#> $dark
#> received exerted Diff
#> X3M 1296.567 1310.354 -13.7871351
#> American.Express 1316.408 1289.709 26.6987292
#> Apple 1333.888 1343.932 -10.0434786
#> Boeing 1340.098 1321.275 18.8232064
#> Caterpillar 1301.375 1307.423 -6.0486719
#> Chevron 1347.516 1336.614 10.9022862
#> Cisco.Systems 1348.657 1320.497 28.1595363
#> Coca.Cola 1354.865 1355.211 -0.3459181
#> DowDuPont 1300.139 1314.202 -14.0628991
#> ExxonMobil 1324.606 1346.279 -21.6727588
#> General.Electric 1338.981 1295.422 43.5595115
#> Goldman.Sachs 1339.976 1293.336 46.6406492
#> IBM 1315.841 1302.226 13.6148066
#> Intel 1330.582 1332.964 -2.3825429
#> Johnson...Johnson 1314.149 1352.354 -38.2055223
#> JPMorgan.Chase 1321.414 1325.701 -4.2864766
#> McDonald.s 1351.131 1343.405 7.7261906
#> Merck 1395.467 1336.614 58.8529086
#> Microsoft 1304.544 1323.603 -19.0587033
#> Nike 1325.596 1365.306 -39.7096535
#> Pfizer 1361.055 1364.847 -3.7924203
#> Procter...Gamble 1362.262 1364.021 -1.7591371
#> The.Home.Depot 1305.889 1314.551 -8.6621955
#> Travelers 1314.600 1325.200 -10.5998366
#> United.Technologies 1303.156 1304.174 -1.0181768
#> UnitedHealth.Group 1397.597 1378.480 19.1166144
#> Verizon 1328.732 1374.133 -45.4008993
#> Walmart 1333.719 1361.920 -28.2011159
#> Walt.Disney 1307.834 1312.891 -5.0568971
#>
#> $items
#> [1] "X3M" "American.Express" "Apple"
#> [4] "Boeing" "Caterpillar" "Chevron"
#> [7] "Cisco.Systems" "Coca.Cola" "DowDuPont"
#> [10] "ExxonMobil" "General.Electric" "Goldman.Sachs"
#> [13] "IBM" "Intel" "Johnson...Johnson"
#> [16] "JPMorgan.Chase" "McDonald.s" "Merck"
#> [19] "Microsoft" "Nike" "Pfizer"
#> [22] "Procter...Gamble" "The.Home.Depot" "Travelers"
#> [25] "United.Technologies" "UnitedHealth.Group" "Verizon"
#> [28] "Walmart" "Walt.Disney"
Then we could observe the total effect in pattern causality.
plotEffect(effects, "negative",TRUE)