Goutte-$ crawler-> filter()から内部値を取得する


とを使用PHP 7.1.33してい"fabpot/goutte": "^3.2"ます。私の作曲家ファイルは次のようになります。

    "name": "ubuntu/workspace",
    "require": {
        "fabpot/goutte": "^3.2"
    "authors": [
            "name": "admin",
            "email": "admin@admin.com"



require 'vendor/autoload.php';

use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;

 * Crawls Detail Calender
 * Does NOT also include wanted Date in the final result set
 * @param $wantedDate
 * @return array
function updateCalendarDetailsData($wantedDate)
    try {
        $client = new Client();

        $x = 1;
        $LIMIT = 3;
        global $x;
        global $LIMIT;
        $res1Array = array();

        $ffUrlArr = ["https://www.forexfactory.com/calendar.php?month=Jan2020"];
        foreach ($ffUrlArr as $key => $v) {

            try {
                $crawler = $client->request('GET', $ffUrlArr[$key]);
            } catch (\Exception $ex) {

            $TEMP = array();

            // $count = $crawler->filter('.calendar_row')->count();
            // $i = 1; // count starts at 1
            $nodeDate = date('Y-m-d');
            $crawler->filter('.calendar_row')->each(function ($node) use (&$res1Array, $wantedDate, $nodeDate) { // $count, $i,
                $EVENT = array();

                // check date for month
                $dayMonth = str_split(explode(" ", trim($node->getNode(0)->nodeValue))[0], 3);
                $day = explode(" ", trim($node->getNode(0)->nodeValue))[1];
                if (is_numeric($day)) {
                    $nodeDate = date("Y-m-d H:i:s", strtotime($dayMonth[0] . " " . $dayMonth[1] . " " . $day));

                // return if wanted date is reached
                if (date("Y-m-d", strtotime($nodeDate)) == date("Y-m-d", strtotime($wantedDate))) {
                    return $res1Array;

                $EVENTID = $node->attr('data-eventid');

                $API_RESPONSE = file_get_contents('https://www.forexfactory.com/flex.php?do=ajax&contentType=Content&flex=calendar_mainCal&details=' . $EVENTID);

                $API_RESPONSE = str_replace("<![CDATA[", "", $API_RESPONSE);
                $API_RESPONSE = str_replace("]]>", "", $API_RESPONSE);

                $html = <<<HTML
<!DOCTYPE html>

                $subcrawler = new Crawler($html);

                $subcrawler->filter('.calendarspecs__spec')->each(function ($LEFT_TD) use (&$res1Array, &$TEMP, &$EVENT) {

                    $LEFT_TD_INNER_TEXT = trim($LEFT_TD->text());

                    if ($LEFT_TD_INNER_TEXT == "Source") {

                        $TEMP = array();
                        $LEFT_TD->nextAll()->filter('a')->each(function ($LINK) use (&$TEMP) {
                            array_push($TEMP, $LINK->text(), $LINK->attr('href'));

                        $EVENT['sourceTEXT'] = $TEMP[0];
                        $EVENT['sourceURL'] = $TEMP[1];
                        $EVENT['latestURL'] = $TEMP[3];

                    if ($LEFT_TD_INNER_TEXT == "Measures") {
                        $EVENT['measures'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Usual Effect") {
                        $EVENT['usual_effect'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Frequency") {
                        $EVENT['frequency'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Why Traders") {
                        $EVENT['why_traders_care'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Derived Via") {
                        $EVENT['derived_via'] = $LEFT_TD->nextAll()->text();
                        // array_push($res1Array, $EVENT); // <---- HERE I GET THE ERROR!
                if ($i > $count) {
                    echo "<pre>";
                    echo "</pre>";
    } catch (\Exception $ex) {
    return $res1Array;



    sourceTEXT => "test", 
    sourceURL => "test",
    latestURL => "test", 
    measures => "test",
    usual_effect => "test",
    derived_via => "test",
    why_traders_care => "test",
    frequency => "test"
    sourceTEXT => "test1", 
    sourceURL => "test1",
    latestURL => "test1", 
    measures => "test1",
    usual_effect => "test1",
    derived_via => "test1",
    why_traders_care => "test1",
    frequency => "test1"
    sourceTEXT => "test2", 
    sourceURL => "test2",
    latestURL => "test2", 
    measures => "test2",
    usual_effect => "test2",
    derived_via => "test2",
    why_traders_care => "test2",
    frequency => "test2"
// ... 




@tftdからスクリプトを実行しました"fabpot/goutte": "^4.0"が、次のようになりました。

array(94) {
  [0] =>
  array(10) {
    'eventId' =>
    string(6) "114340"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
  [1] =>
  array(10) {
    'eventId' =>
    string(6) "114341"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
  [2] =>
  array(10) {
    'eventId' =>
    string(6) "114342"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
  [3] =>
  array(10) {
    'eventId' =>
    string(6) "114343"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
  [4] =>
  array(10) {
    'eventId' =>
    string(6) "114328"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
  [5] =>
  array(10) {
    'eventId' =>
    string(6) "113632"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
  [6] =>
  array(10) {
    'eventId' =>
    string(6) "114308"
    'date' =>
    string(10) "2020-01-01"
    'sourceTEXT' =>
    'sourceURL' =>
    'latestURL' =>
    'measures' =>
    'usual_effect' =>
    'derived_via' =>
    'why_traders_care' =>
    'frequency' =>
// ...



@tftd今日から将来の特定の日付までテーブルの行を解析したいnow() - 2020/01-18(これは上記の例の一部ではありません。最初から始まるため、特定の日付までですが、不要な行をスキップすることができます) 。私の大きな問題は、空の配列が返されることです。完全に機能する例を提供してください。

@ Anna.Klee、あなたの質問に対する「更新」に関して:使用しているリンクが実際のAPIであり、一部のテストエンドポイントではないことを確信していますか?実際のAPIエンドポイントには通常、いくつかの資格情報が必要です。また、「forexfactory.com/flex.php ...」の多くのフィールドが空であるだけでなく、「forexfactory.com/calendar.php?month=Jan2020」にアクセスしたときにブラウザに表示される内容も異なります。file_get_contents()またはで何が得られるか$client->request()。たとえば、イベントID 113606





 $parser = new CalendarParser(date_create());


   date_create_from_format('Y-m-d H:i:s', '2020-01-01 00:00:00'),
   date_create_from_format('Y-m-d H:i:s', '2020-01-02 23:59:59')


<!-- language: lang-none -->

array(22) { 
  [0] => array(10) {
    'eventId' => string(6) "114340"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [1] => array(10) {
    'eventId' => string(6) "114341"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [2] => array(10) {
    'eventId' => string(6) "114342"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [3] => array(10) {
    'eventId' => string(6) "114343"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [4] => array(10) {
    'eventId' => string(6) "114328"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [5] => array(10) {
    'eventId' => string(6) "113632"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [6] => array(10) {
    'eventId' => string(6) "114308"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [7] => array(10) {
    'eventId' => string(6) "113607"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [8] => array(10) {
    'eventId' => string(6) "113816"
    'date' => string(10) "2020-01-01"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [9] => array(10) {
    'eventId' => string(6) "114718"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(25) "Reserve Bank of Australia"
    'sourceURL' => string(21) "http://www.rba.gov.au"
    'latestURL' => string(65) "http://www.rba.gov.au/statistics/frequency/commodity-prices/2019/"
    'measures' => string(52) "Change in the selling price of exported commodities;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(120) "The average selling price of the nation's main commodity exports are sampled and then compared to the previous sampling;"
    'why_traders_care' => string(128) "It's a leading indicator of the nation's trade balance with other countries because rising commodity prices boost export income;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [10] => array(10) {
    'eventId' => string(6) "114344"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL
  [11] => array(10) {
    'eventId' => string(6) "111383"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 400 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [12] => array(10) {
    'eventId' => string(6) "111382"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 450 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [13] => array(10) {
    'eventId' => string(6) "111379"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 750 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [14] => array(10) {
    'eventId' => string(6) "111380"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 800 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [15] => array(10) {
    'eventId' => string(6) "111381"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(205) "Survey of about 5000 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [16] => array(10) {
    'eventId' => string(6) "111397"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 650 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [17] => array(10) {
    'eventId' => string(6) "111102"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(34) "Challenger, Gray & Christmas, Inc."
    'sourceURL' => string(30) "http://www.challengergray.com/"
    'latestURL' => string(50) "http://www.challengergray.com/press/press-releases"
    'measures' => string(56) "Change in the number of job cuts announced by employers;"
    'usual_effect' => string(51) "'Actual' less than 'Forecast' is good for currency;"
    'derived_via' => NULL
    'why_traders_care' => NULL
    'frequency' => string(52) "Released monthly, about 3 days after the month ends;"
  [18] => array(10) {
    'eventId' => string(6) "110766"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(19) "Department of Labor"
    'sourceURL' => string(18) "http://www.dol.gov"
    'latestURL' => string(20) "https://www.dol.gov/"
    'measures' => string(103) "The number of individuals who filed for unemployment insurance for the first time during the past week;"
    'usual_effect' => string(51) "'Actual' less than 'Forecast' is good for currency;"
    'derived_via' => NULL
    'why_traders_care' => string(306) "Although it's generally viewed as a lagging indicator, the number of unemployed people is an important signal of overall economic health because consumer spending is highly correlated with labor-market conditions. Unemployment is also a major consideration for those steering the country's monetary policy;"
    'frequency' => string(44) "Released weekly, 5 days after the week ends;"
  [19] => array(10) {
    'eventId' => string(6) "113642"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 400 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [20] => array(10) {
    'eventId' => string(6) "111392"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => string(6) "Markit"
    'sourceURL' => string(30) "http://www.markiteconomics.com"
    'latestURL' => string(72) "https://www.markiteconomics.com/Public/Release/PressReleases?language=en"
    'measures' => string(95) "Level of a diffusion index based on surveyed purchasing managers in the manufacturing industry;"
    'usual_effect' => string(54) "'Actual' greater than 'Forecast' is good for currency;"
    'derived_via' => string(204) "Survey of about 800 purchasing managers which asks respondents to rate the relative level of business conditions including employment, production, new orders, prices, supplier deliveries, and inventories;"
    'why_traders_care' => string(213) "It's a leading indicator of economic health - businesses react quickly to market conditions, and their purchasing managers hold perhaps the most current and relevant insight into the company's view of the economy;"
    'frequency' => string(65) "Released monthly, on the first business day after the month ends;"
  [21] => array(10) {
    'eventId' => string(6) "113817"
    'date' => string(10) "2020-01-02"
    'sourceTEXT' => NULL
    'sourceURL' => NULL
    'latestURL' => NULL
    'measures' => NULL
    'usual_effect' => NULL
    'derived_via' => NULL
    'why_traders_care' => string(230) "Banks facilitate the majority of foreign exchange volume. When they are closed the market is less liquid and speculators become a more dominant market influence. This can lead to both abnormally low and abnormally high volatility;"
    'frequency' => NULL



require 'vendor/autoload.php';

use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;

 * Thinking OOP is easier for me.
 * You can easily restructure this into a `functional` code if that's what you need.
class CalendarParser

    const BASE_URL = 'https://www.forexfactory.com/calendar.php?month=%s';
    const EVENT_URL = 'https://www.forexfactory.com/flex.php?do=ajax&contentType=Content&flex=calendar_mainCal&details=%d';

     * @var
    private $client;

     * @var DateTime
    private $calendarMonth;

     * @var Crawler
    private $page;

     * @var Crawler
    private $table;

     * @var array
    private $dateIndexes;

     * CalendarParser constructor.
     * @param DateTime $calendarMonth
     * @throws Exception
    public function __construct(DateTime $calendarMonth)
        $this->client = new Client();
        $this->calendarMonth = $calendarMonth;

        // Fetch page and table data and store it so we can iterate over it.
        $this->page = $this->client->request('GET', sprintf(self::BASE_URL, $this->calendarMonth->format('MY')));
        $this->table = $this->page->filter('.calendar_row');

        // Get date indexes

     * The table uses a class called `newday` at each new date which can be used to create an index of
     * where the date records begin which makes parsing easier.
    private function generateDateIndexes()
        $dateIndexes = [];

        $previousDate = null;
             * NOTE: This is a closure function which will be called until the foreach completes.
             *       You cannot break out of it like when you do `foreach() { break; }`.
             *       If you do `return` - it will simply skip executing the rest of the function but won't break the cycle.
            ->each(function (Crawler $node, $index) use (&$dateIndexes, &$previousDate) {
                $isNewDateSeparator = strpos($node->getNode(0)->getAttribute('class'), 'newday') !== false;

                if ($isNewDateSeparator) {
                    // Convert the date to `Jan-1-STARTING_YEAR` to be easier to search in the array.
                    $dateColumnNode = $node->filter('.date > span > span');
                    $stringDate = str_replace(' ', '-', $dateColumnNode->text()) . '-' . $this->calendarMonth->format('Y');
                    $date = date_create_from_format('M-d-Y', $stringDate);
                    $formattedDate = $date->format('Y-m-d');

                    $dateIndexes[$formattedDate] = [
                        'start' => $index,
                        'end'   => null

                    if ($previousDate) {
                        $dateIndexes[$previousDate]['end'] = ($index - 1);

                    $previousDate = $formattedDate;

        $this->dateIndexes = $dateIndexes;

     * @param Crawler $row
     * @return array
    private function processEvent(DateTime $date, Crawler $row)
        $eventId = $row->attr('data-eventid');

        $event = [
            'eventId'          => $eventId,
            'date'             => $date->format('Y-m-d'),
            'sourceTEXT'       => null,
            'sourceURL'        => null,
            'latestURL'        => null,
            'measures'         => null,
            'usual_effect'     => null,
            'derived_via'      => null,
            'why_traders_care' => null,
            'frequency'        => null

        $content = $this->client->request('GET', sprintf(self::EVENT_URL, $eventId))->html();
        $crawler = new Crawler($content, null, null);

        $table = $crawler->filter('.calendarspecs__spec')->first()->closest('table');

              ->each(function (Crawler $tr) use (&$event) {
                  $label = $tr->filter('.calendarspecs__spec')->text();

                  $description = $tr->filter('.calendarspecs__specdescription');

                  if ($label === 'Source') {
                      $TEMP = [];
                      $description->filter(' a')
                                  ->each(function ($link) use (&$TEMP) {
                                      array_push($TEMP, $link->text(), $link->attr('href'));

                      $event['sourceTEXT'] = $TEMP[0];
                      $event['sourceURL'] = $TEMP[1];
                      $event['latestURL'] = $TEMP[3];

                  if ($label == "Measures") {
                      $event['measures'] = $description->text();

                  if ($label == "Usual Effect") {
                      $event['usual_effect'] = $description->text();

                  if ($label == "Frequency") {
                      $event['frequency'] = $description->text();

                  // this is how it's returned.
                  if ($label == "Why TradersCare") {
                      $event['why_traders_care'] = $description->text();

                  if ($label == "Derived Via") {
                      $event['derived_via'] = $description->text();


        return $event;

     * Get the events between a start and end date.
     * If no endDate is defined - then it will get all events since $startDate.
     * @param DateTime $startDate
     * @param DateTime|null $endDate
     * @return array
    public function getEventsBetweenDates(DateTime $startDate, DateTime $endDate = null)
        $events = [];

        $totalCalendarRows = $this->table->count();
        foreach ($this->dateIndexes as $stringDate => $range) {
            $date = date_create_from_format('Y-m-d', $stringDate);

            // Process only the range from the start date
            if ($date >= $startDate) {
                // and break early when we reach the end.
                if ($endDate && $date > $endDate) {

                // collect and process events for the current date
                $start = $range['start'];
                $end = $range['end'] !== null ? $range['end'] : $totalCalendarRows;
                for ($i = $start; $i < $end; $i++) {
                    $events[] = $this->processEvent($date, new Crawler($this->table->getNode($i)));

        return $events;


$parser = new CalendarParser(date_create());

        date_create_from_format('Y-m-d H:i:s', '2020-01-01 00:00:00'),
        date_create_from_format('Y-m-d H:i:s', '2020-01-02 23:59:59')

ねえ、私はあなたの答えに反対票を投じなかった!そのトップだと思います!ただし、クラスを実行すると、が表示されUncaught Error: Call to undefined method Symfony\Component\DomCrawler\Crawler::closest()ます。実行しているライブラリのバージョンは何ですか?

私が使用していますphp 7.1

私のコメントは、反対票を投じた人を対象としたもので、具体的にはそうではありません。私見あなたが間違っていると思うことを指摘せずに反対投票するのは不自由です。現在使用してphp 7.3いますが、問題ありません。古いバージョンのfabpot/gouttemineを使用している可能性がありますv4.0.0。編集:私はあなたがあなたが使用していることを指摘したあなたの質問に気づきました^3.2。に更新でき4.0ますか?


fabpot/goutte@3.3.0のパッケージが必要ですsymfony/dom-crawler(どこでどのCrawlerバージョンのいずれかでクラスがから来ています)^4.4^5.0。この機能は両方のリリースに存在します(リンクを確認してください)。私はあなたの側で何かが間違っているのではないかと思っています-私は両方のバージョンでそれを試してみましたが、うまくいきました。composer show -i実際に何がインストールされているか確認してください。



// return if wanted date is reached
if (date("Y-m-d", strtotime($nodeDate)) == date("Y-m-d", strtotime($wantedDate))) {
  return $res1Array;









require 'vendor/autoload.php';

// use Goutte\Client;
use Symfony\Component\DomCrawler\Crawler;

 * Crawls Detail Calender
 * Does NOT also include wanted Date in the final result set
 * @param $wantedDate
 * @return array
function updateCalendarDetailsData($wantedDate)
    // *** small optimizations
    $Year = $wantedDate->format("Y");
    $wantedDateStr = $wantedDate->format("Y M j");

    try {
        // $client = new Client(); // *** I don't see any need in this package

        $res1Array = array();

        $ffUrlArr = ["https://www.forexfactory.com/calendar.php?month=Jan2020"];
        foreach ($ffUrlArr as $key => $v) {
        // *** There one link in ffUrlArr, it's better to get rid off foreach().
        // *** But for now - let it be

            try {
                $crawler = new Crawler(file_get_contents($ffUrlArr[$key]));
                // $crawler = $client->request('GET', $ffUrlArr[$key]);
                // *** It's the only place where Goutte was used
            } catch (\Exception $ex) {

            // $TEMP = array();
            // *** No need to define it here, it's used only inside $subcrawler,
            // *** And it's redefined there

            // $nodeDate = date('Y-m-d');
            // *** no need for date('Y-m-d')
            $nodeDate = "";
            // $crawler->filter('.calendar_row')->each(function ($node) use (&$res1Array, $wantedDate, $nodeDate) {
            // *** BUG 1: here your forgot to put "&" before $nodeDate

            // *** Also, because you need to return on $wantedDate,
            // *** but you can not break from the each()
            // *** it is better to use foreach(), and in my opinion it
            // *** looks simpler. And it is less error prone,
            // *** as we can see.

            // *** By using '[data-eventid][data-touchable]' instead
            // *** of '.calendar_row' we can get rid of multiple requests
            // *** to forexfactory API with same $EVENTID
            foreach($crawler->filter('[data-eventid][data-touchable]') as $DOM_el) {
                $node = new Crawler($DOM_el);

                // $EVENT = array();
                // *** it's almost always better to define variable
                // *** near the place they are used. Moved it

                // check date for month
                // $dayMonth = str_split(explode(" ", trim($node->getNode(0)->nodeValue))[0], 3);
                // $day = explode(" ", trim($node->getNode(0)->nodeValue))[1];
                // if (is_numeric($day)) {
                //     $nodeDate = date("Y-m-d H:i:s", strtotime($dayMonth[0] . " " . $dayMonth[1] . " " . $day));
                // }
                // *** This is a cleaner and a simpler way to retrive
                // *** a date from this html. Getting nodeDate in the
                // *** form of "Y M j" (e.g. "2020 Jan 1")
                $date_node = $node->filter('.date > span > span');
                if( $date_node->count() != 0 ) {
                    $nodeDate = $Year . " " . $date_node->text();

                // return if wanted date is reached
                // if (date("Y-m-d", strtotime($nodeDate)) == date("Y-m-d", strtotime($wantedDate))) {
                // *** There is no need for so many convertions.
                // *** Strings' comparison is good enough

                // *** BUG 2: Not critical, but "havy".
                // *** Because you can not break from ->each()
                // *** checking dates with "==" led to skiping only
                // *** $wantedDate, all dates after $wantedDate
                // *** were still iterated over
                if ($nodeDate == $wantedDateStr) {
                    // return $res1Array;
                    // *** Now, when we use foreach() instead of
                    // *** ->each() we can return from here.
                    // *** But still, I think it's better to use break.
                    // *** In case you would like to add some extra logic
                    // *** at the end, and for other vague reasons :)

                $EVENTID = $node->attr('data-eventid');

                $API_RESPONSE = file_get_contents('https://www.forexfactory.com/flex.php?do=ajax&contentType=Content&flex=calendar_mainCal&details=' . $EVENTID);

                $API_RESPONSE = str_replace("<![CDATA[", "", $API_RESPONSE);
                $API_RESPONSE = str_replace("]]>", "", $API_RESPONSE);

                $html = <<<HTML
<!DOCTYPE html>

                $subcrawler = new Crawler($html);

                // *** Took this part from tftd's answer
                // *** It's a good practice to define all possible fields
                $EVENT = [
                    'id'               => $EVENTID,
                    'date'             => $nodeDate,
                    'sourceTEXT'       => null,
                    'sourceURL'        => null,
                    'latestURL'        => null,
                    'measures'         => null,
                    'usual_effect'     => null,
                    'derived_via'      => null,
                    'why_traders_care' => null,
                    'frequency'        => null
                // $EVENT = array(); // *** But you can always switch back for this simple definition
                // $subcrawler->filter('.calendarspecs__spec')->each(function ($LEFT_TD) use (&$res1Array, &$TEMP, &$EVENT) {
                // *** once again switching from ->each() to foreach(),
                // *** just for the consistency
                foreach($subcrawler->filter('.calendarspecs__spec') as $DOM_el) {
                    $LEFT_TD = new Crawler($DOM_el);

                    $LEFT_TD_INNER_TEXT = trim($LEFT_TD->text());

                    if ($LEFT_TD_INNER_TEXT == "Source") {

                        $TEMP = array();
                        $LEFT_TD->nextAll()->filter('a')->each(function ($LINK) use (&$TEMP) {
                            array_push($TEMP, $LINK->text(), $LINK->attr('href'));

                        $EVENT['sourceTEXT'] = $TEMP[0];
                        $EVENT['sourceURL'] = $TEMP[1];
                        $EVENT['latestURL'] = $TEMP[3];

                    if ($LEFT_TD_INNER_TEXT == "Measures") {
                        $EVENT['measures'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Usual Effect") {
                        $EVENT['usual_effect'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Frequency") {
                        $EVENT['frequency'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Why TradersCare") {
                        // *** BUG 3: As tftd noticed - you had an issue
                        // *** with name of this field
                        $EVENT['why_traders_care'] = $LEFT_TD->nextAll()->text();

                    if ($LEFT_TD_INNER_TEXT == "Derived Via") {
                        $EVENT['derived_via'] = $LEFT_TD->nextAll()->text();
                        // array_push($res1Array, $EVENT); // <---- HERE I GET THE ERROR!
                        // *** BUG 4: And this was the main complication
                        // *** 1) Being here array_push() wasn't called if event
                        // ***    had no "Derived Via" field
                        // *** 2) but even more than that... it was somehow put
                        // ***    in the comments... and of course this led to
                        // ***    $res1Array never been populated
                array_push($res1Array, $EVENT);
                // *** this command should be here
    } catch (Exception $ex) {
    return $res1Array;
// *** You'd better use DateTime, so its fields could be manipulated
// *** and retrieved more easily than in the case of a string representation
// var_dump(updateCalendarDetailsData(date("2020-01-02")));
var_dump(updateCalendarDetailsData(new DateTime("2020-01-02")));

Licensed under cc by-sa 3.0 with attribution required.