Created
March 13, 2017 06:04
-
-
Save maynagashev/8d5eefc5921f1658b07f7ad397c0a3c6 to your computer and use it in GitHub Desktop.
SmoothData slow versions with Laravel collections
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Other versions | |
/** | |
* Smooth by Moving Average algorithm with prevData measurements data (optional) | |
* ---------------------------- | |
* USING COLLECTIONS VERY SLOW! | |
* ---------------------------- | |
* @param $data | |
* @param $len | |
* @param array $prevData | |
* @return array | |
*/ | |
public function smoothByMovingAverageUseCollections($data, $len, $prevData = []) | |
{ | |
$prev = collect($prevData)->values(); | |
$col = collect($data); | |
$values = $prev->merge($col->values()); | |
$verboseHours = []; $verboseTime = 0; | |
if (self::VERBOSE) { | |
date_default_timezone_set('UTC'); | |
echo "Previous Data received: ". count($prevData)." records.\nCalculating MOVING AVERAGE for ".$col->count()." records...\n"; | |
} | |
$col->transform(function ($item, $timestamp) use ($values, $len, $prev, &$verboseHours, &$verboseTime) { | |
if (self::VERBOSE) { | |
$vKey = date("Y-m-d H:00", $timestamp); | |
if (!array_key_exists($vKey, $verboseHours)) { | |
// check for finish prev | |
if (count($verboseHours)>0){ | |
$vKeyPrev = date("Y-m-d H:00", $timestamp-1); | |
if (array_key_exists($vKeyPrev, $verboseHours)) { | |
$timing = round(microtime(true) - $verboseTime, 1); | |
echo " - finished: " . $verboseHours[$vKeyPrev] . " records, {$timing} sec (" . round($timing / 60, 1) . " min)\n"; | |
} | |
} | |
// start new hour | |
echo "Processing: {$vKey} "; | |
$verboseHours[$vKey] = 1; | |
$verboseTime = microtime(true); | |
} | |
else { | |
$verboseHours[$vKey]++; | |
} | |
} | |
$currentPosition = $values->search($item); | |
$selectedArea = $values->filter(function($value, $i) use ($currentPosition, $len){ | |
$leftEdge = $currentPosition - $len; | |
return ($i > $leftEdge && $i <= $currentPosition); // current inclusive | |
}); | |
$average = (int) round($selectedArea->avg(), 0); | |
// debug info for different cases: first and last | |
if (self::DEBUG && ($currentPosition < $prev->count()+1 || ($currentPosition>=$values->count()-1 && $currentPosition < $values->count()))) { | |
dump("Item: {$item} curpos: {$currentPosition} prevData count: ".$prev->count() | |
." RESULT average={$average} from selected items:", $selectedArea); | |
} | |
return $average; | |
}); | |
return $col->toArray(); | |
} | |
/** | |
* Calculate precise average with $len/2 duplications on the edges | |
* @param $data | |
* @param $len | |
* @return array | |
*/ | |
public function smoothByAverage($data, $len) | |
{ | |
$col = collect($data); | |
$values = $col->values(); // reset keys | |
$col->transform(function ($item) use ($values, $len) { | |
$currentPosition = $values->search($item); | |
$half = $len/2; | |
// 1. prepend + current | |
$prependItems = $values->filter(function($value, $i) use ($currentPosition, $half){ | |
$leftEdge = $currentPosition - $half; | |
return ($i >= $leftEdge && $i <= $currentPosition); // current inclusive | |
}); | |
$lackLeft = $len - $prependItems->count(); // for first points will be lack of prepend items | |
// 2. append | |
$appendItems = $values->filter(function($value, $i) use ($currentPosition, $half, $lackLeft){ | |
$rightEdge = (($lackLeft > $half)) ? $currentPosition + $lackLeft : $currentPosition + $half; | |
return ($i > $currentPosition && $i <= $rightEdge); | |
}); | |
$lackRight = $len - ( $prependItems->count() + $appendItems->count() ); | |
// 3. For last points will be lack of data to the right, return and recount prependItems with new info | |
if ($lackRight>0) { | |
$prependItems = $values->filter(function($value, $i) use ($currentPosition, $half, $lackRight){ | |
$leftEdge = $currentPosition - ($half + $lackRight); | |
return ($i >= $leftEdge && $i <= $currentPosition); // current inclusive | |
}); | |
} | |
$selectedArea = $prependItems->merge($appendItems); | |
$average = (int) round($selectedArea->avg(), 0); | |
// debug info for different cases | |
if (self::DEBUG && ($currentPosition<3 || ($currentPosition>=30 && $currentPosition<33) || $currentPosition>57)) { | |
dump("Item: {$item}, " | |
."curpos: {$currentPosition}, prepend: " . $prependItems->count() . ", append: " . $appendItems->count()." " | |
."RESULT average={$average} from selected items:", $selectedArea); | |
} | |
return $average; | |
}); | |
return $col->toArray(); | |
} | |
/** | |
* Smooth series of measurements in time perspective. | |
* 1) Pick up local maximum values (within specified $horizon). | |
* 2) Calculate approximated values for a straight line to that maximum. | |
* 3) Correct values with taking into account the influence of intermediary values (using $multiplier). | |
* $data = array( timestamp1 => value1, timestamp2 => value2, ...) | |
* $horizon = (int) [1-...] - measurements count for look forward (defines local range), not zero integer | |
* $effect = 0 - no effect, no correction (smoothByLocalMaximum() will return no changes) | |
* 1 - straight line to maximum, no influence of current values (build straight line to maximum) | |
* | |
* Example: $data = $this->smoothByLocalMaximum($data, 60, 0.5); // horizon 1 min, effect = 50% | |
* | |
* @param array $data | |
* @param int $horizon | |
* @param float $effect | |
* @return array $ret | |
*/ | |
public function smoothByLocalMaximum(Array $data, $horizon, $effect) | |
{ | |
$effect = ($effect<0 || $effect>1) ? 1 : $effect; | |
$multiplier = (1 - $effect); | |
$this->log("Data records: ".count($data).", horizon = {$horizon}, effect=".($effect*100)."% , multiplier = {$multiplier}"); | |
if ($horizon<1) return $data; // if not proper horizon value passed - just return untouched data | |
if ($multiplier == 1) return $data; // with multiplier = 1 results will be identical | |
$col = collect($data)->values(); // convert to collection and reset keys | |
// if horizon shorter than array, splice array for chunks by horizon value | |
$hChunks = collect([]); | |
while ($col->count() > $horizon) { | |
$spliced = $col->splice(0, $horizon); | |
$hChunks->push($spliced); | |
} | |
$hChunks->push($col); | |
// process horizon chunks | |
$this->log('Horizon chunks: '.$hChunks->count()); | |
$hChunks->transform(function($chunk) use ($multiplier) { | |
return $this->processHorizonChunk($chunk, $multiplier); | |
}); | |
// flatten, indexes (timestamps) was lost after splice operations | |
$newValues = $hChunks->flatten(1)->toArray(); | |
// recover timestamp indexes and create change_log | |
$i = 0; $ret = []; $change_log = []; | |
foreach($data as $timestamp => $oldValue) { | |
$ret[$timestamp] = $newValues[$i]; | |
$change_log[$timestamp] = "{$oldValue} => {$newValues[$i]}"; | |
$i++; | |
} | |
//$this->log($change_log); | |
return $ret; | |
} | |
private function processHorizonChunk($col, $multiplier) | |
{ | |
// splice collection by next max value (upChunks) | |
$upChunks = collect([]); $i=0; | |
while($col->count()>0) { | |
$max_index = $col->search($col->max()); | |
$this->log(" + splice ".($max_index+1)." elements to upChunk #{$i}. max_value: ".$col->max().", max_index: ". $max_index); $i++; | |
$spliced = $col->splice(0, $max_index+1); | |
$upChunks->push($spliced); | |
} | |
if ($col->count()>0) $upChunks->push($col); | |
// process horizon chunks | |
$this->log('Ascending upChunks: '.$upChunks->count()); | |
$upChunks->transform(function($chunk) use ($multiplier) { | |
return $this->processUpChunk($chunk, $multiplier); | |
}); | |
return $upChunks->flatten(1); | |
} | |
/** | |
* Ascending series, the last item in the collection has maximum value. | |
* @param $col | |
* @param $multiplier | |
* @return mixed | |
*/ | |
private function processUpChunk($col, $multiplier) | |
{ | |
if ($col->count() <= 1) return $col; // nothing to smooth, just return collection untouched | |
$offset = 0; // offset from start, in common case we know prev maximum and will change values from very first element | |
if (is_null($this->prevMax)) { | |
$offset = 1; // start correction from second element | |
$this->prevMax = $col->first(); | |
} | |
// calculate correction per step for straight line | |
$base = $this->prevMax; | |
$dif = $col->last() - $this->prevMax; | |
$steps = $col->count() - $offset; | |
$correctionPerStep = $dif / $steps; | |
// some debug information | |
$this->log("Smooth series from {$this->prevMax} => {$col->last()}.", 1); | |
$this->log("Processing upChunk (".$col->count()." records): prevMax = {$this->prevMax}, currentMax = {$col->last()}, offset = {$offset}."); | |
$this->log("Difference {$dif}, steps: {$steps}, correctionPerStep: {$correctionPerStep}"); | |
// save current maximum for next upChunk | |
$this->prevMax = $col->last(); | |
$col->transform(function($currentValue, $i) use ($base, $offset, $correctionPerStep, $multiplier) { | |
if ($i < $offset) return $currentValue; // don't change offset values | |
$straightLineValue = $base + $correctionPerStep * ( ($i + 1) - $offset ); | |
$delta = $straightLineValue - $currentValue; // straight line to max point will always above intermediary values | |
$influence = $delta * $multiplier; | |
$newValue = round($straightLineValue - $influence); | |
$this->log("Change {$currentValue} => {$newValue} | " | |
."correction: [straightValue: ".round($straightLineValue, 1)." - influence: ".round($influence, 1)." => {$newValue}] " | |
."delta: $delta, multiplier = ".($multiplier*100)."% => influence: ".round($influence, 1)); | |
return $newValue; | |
}); | |
return $col; | |
} | |
private function log($var, $delimiter = 0) { | |
if (!self::DEBUG) return; | |
if ($delimiter) dump("-------------------"); | |
dump($var); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment