From bb3c4ed404d43258510644dd7129741c0d7f7540 Mon Sep 17 00:00:00 2001 From: "venkat.iyer" Date: Thu, 11 Oct 2018 19:21:51 +0900 Subject: [PATCH] external/error_report: Add Infinity wait error reporting feature to TizenRT Add Infinity wait error reporting feature, where callstack of threads in a seemingly endless wait are reported to a server. This can also be used to inspect threads that are possibly deadlocked. Following are the summarized details of the feature add-on: 1. Definition of an appropriate Kconfig for the infinity wait feature. Also, turning on active cpu load monitoring when this feature is requested, which is necessary. 2. Implementation of the infinity wait policy under `external/error_report` 3. Replacing all scheduler-based APIs at `external/error_report` with apporiate `ioctls`, and implementing a `os/drivers/error_report` to place the scheduler API calls instead. 4. Implementing a scenario for an infinitely waiting thread pair in the sample `error_report_demo` application 5. Moving the `error_report_init` API call to `os_bringup` function from its previous location. --- apps/examples/err_report_demo/Makefile | 2 +- .../err_report_demo/err_report_main.c | 76 ++++- external/error_report/Kconfig | 36 ++- external/error_report/error_report.c | 167 +++++++++- external/include/error_report/error_report.h | 30 +- os/arch/arm/src/common/up_initialize.c | 4 - os/drivers/Makefile | 1 + os/drivers/error_report/Make.defs | 29 ++ os/drivers/error_report/error_report_drv.c | 302 ++++++++++++++++++ os/include/tinyara/error_report_internal.h | 76 +++++ os/include/tinyara/fs/ioctl.h | 14 + os/kernel/init/os_bringup.c | 7 + 12 files changed, 716 insertions(+), 28 deletions(-) create mode 100644 os/drivers/error_report/Make.defs create mode 100644 os/drivers/error_report/error_report_drv.c create mode 100644 os/include/tinyara/error_report_internal.h diff --git a/apps/examples/err_report_demo/Makefile b/apps/examples/err_report_demo/Makefile index 9637c84483..429f33b987 100644 --- a/apps/examples/err_report_demo/Makefile +++ b/apps/examples/err_report_demo/Makefile @@ -94,7 +94,7 @@ install: endif -ifeq ($(CONFIG_BUILTIN_APPS)$(CONFIG_EXAMPLES_RT_ERR_REPORT),yy) +ifeq ($(CONFIG_BUILTIN_APPS)$(CONFIG_EXAMPLES_ERR_REPORT),yy) $(BUILTIN_REGISTRY)$(DELIM)$(FUNCNAME).bdat: $(DEPCONFIG) Makefile $(Q) $(call REGISTER,$(APPNAME),$(FUNCNAME),$(THREADEXEC),$(PRIORITY),$(STACKSIZE)) diff --git a/apps/examples/err_report_demo/err_report_main.c b/apps/examples/err_report_demo/err_report_main.c index b212d90380..bdfedc21c2 100644 --- a/apps/examples/err_report_demo/err_report_main.c +++ b/apps/examples/err_report_demo/err_report_main.c @@ -36,6 +36,8 @@ #define RT_MSG(msg) printf("%s: %s\n", __BASE_FILE__, msg) sem_t g_err_sem; +sem_t g_sem1; +sem_t g_sem2; uint8_t g_err_nscenarios; uint8_t g_err_nsuccess; @@ -84,7 +86,7 @@ uint8_t g_err_nsuccess; * WiFi Manager callback prototypes */ static void prv_sta_connected(wifi_manager_result_e); -static void prv_sta_disconnected(void); +static void prv_sta_disconnected(wifi_manager_disconnect_e res); static void prv_softap_sta_join(void); static void prv_softap_sta_leave(void); static void prv_scan_done(wifi_manager_scan_info_s **scan_result, wifi_manager_scan_result_e res); @@ -98,7 +100,7 @@ static void prv_sta_connected(wifi_manager_result_e res) ERR_REPORT_TEST_SIGNAL; } -static void prv_sta_disconnected(void) +static void prv_sta_disconnected(wifi_manager_disconnect_e res) { sleep(2); printf(" [RT] T%d --> %s\n", getpid(), __FUNCTION__); @@ -162,7 +164,7 @@ static void error_report_single(const char *endpoint) char readbuf[1024]; int nbytes_read = 0; int nbytes_sent = 0; - + error_report_init(); res = wifi_manager_init(&wifi_callbacks); if (res != WIFI_MANAGER_SUCCESS) { printf(" wifi_manager_init fail\n"); @@ -191,6 +193,7 @@ static void error_report_single(const char *endpoint) wifi_manager_disconnect_ap(); ERR_REPORT_TEST_WAIT; wifi_manager_deinit(); + error_report_deinit(); ERR_REPORT_TC_END_CHECK(nbytes_sent > 0); } @@ -207,6 +210,7 @@ static void error_report_multiple(const char *endpoint) int sock_cnt = 0; int sock_fds[CONFIG_NSOCKET_DESCRIPTORS]; + error_report_init(); res = wifi_manager_init(&wifi_callbacks); if (res != WIFI_MANAGER_SUCCESS) { printf(" wifi_manager_init fail\n"); @@ -259,6 +263,7 @@ static void error_report_multiple(const char *endpoint) wifi_manager_disconnect_ap(); ERR_REPORT_TEST_WAIT; wifi_manager_deinit(); + error_report_deinit(); ERR_REPORT_TC_END_CHECK(nbytes_sent > 0); } @@ -273,6 +278,67 @@ static void error_report_queue_underflow(void) ERR_REPORT_TC_END_CHECK(nbytes_read == 0); } +static int prv_thread1(void *args) +{ + sem_wait(&g_sem1); + sem_post(&g_sem2); + return 0; +} + +static int prv_thread2(void *args) +{ + sem_wait(&g_sem2); + sem_post(&g_sem1); + return 0; +} + +static void error_report_infinity_wait(const char *endpoint) +{ + ERR_REPORT_TC_START; + wifi_manager_result_e res = WIFI_MANAGER_SUCCESS; + wifi_manager_ap_config_s apconfig = { "Gorani", 6, "jonbeo1@", 8, WIFI_MANAGER_AUTH_WPA2_PSK, WIFI_MANAGER_CRYPTO_AES }; + pthread_t thread1; + pthread_t thread2; + int r; + int scenario_success = -1; + error_report_init(); + + error_report_start_infinitywait(); + res = wifi_manager_init(&wifi_callbacks); + if (res != WIFI_MANAGER_SUCCESS) { + printf(" wifi_manager_init fail\n"); + goto done; + } + + res = wifi_manager_connect_ap(&apconfig); + if (res != WIFI_MANAGER_SUCCESS) { + printf(" AP connect failed\n"); + goto done; + } + /* Wait for DHCP connection */ + ERR_REPORT_TEST_WAIT; + sem_init(&g_sem1, 0, 0); + sem_init(&g_sem2, 0, 0); + if ((r = pthread_create(&thread1, NULL, (pthread_startroutine_t)prv_thread1, NULL)) != 0) { + goto done; + } + + pthread_create(&thread2, NULL, (pthread_startroutine_t) prv_thread2, NULL); + /* Sleep for a sufficient amount of time, after which the infinity wait should be seen */ + sleep(CONFIG_ERROR_REPORT_INFINITE_CHECK_TIMER * (CONFIG_ERROR_REPORT_INFINITE_CHECK_THRESHOLD + 1)); + scenario_success = 1; +done: + wifi_manager_disconnect_ap(); + ERR_REPORT_TEST_WAIT; + wifi_manager_deinit(); + error_report_deinit(); + pthread_cancel(thread1); + pthread_join(thread1, NULL); + pthread_cancel(thread2); + pthread_join(thread2, NULL); + ERR_REPORT_TC_END_CHECK(scenario_success > 0); +} + static void error_report_test(const char *endpoint) { ERR_REPORT_LOG_START; @@ -285,6 +351,10 @@ static void error_report_test(const char *endpoint) /* Verify multiple errors across WiFi Manager */ error_report_multiple(endpoint); + /* Verify Infinite Wait */ +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT + error_report_infinity_wait(endpoint); +#endif ERR_REPORT_PRINT_STATS; ERR_REPORT_LOG_END; } diff --git a/external/error_report/Kconfig b/external/error_report/Kconfig index d1ca9eb179..c696657168 100644 --- a/external/error_report/Kconfig +++ b/external/error_report/Kconfig @@ -63,5 +63,39 @@ config EPORT_ERROR_REPORT default "5001" ---help--- Enter Port of endpoint server where error logs will be gathered - + +config ERROR_REPORT_INFINITE_WAIT + bool "Report error for infinitely waiting threads" + default n + select SCHED_CPULOAD + ---help--- + Report error if threads wait infinitely long + +if ERROR_REPORT_INFINITE_WAIT + +config ERROR_REPORT_INFINITE_CHECK_TIMER + int "Timeout in seconds to check for infinite wait" + default 6 + ---help--- + Specify timeout value after which to check for infinite waits + +config ERROR_REPORT_INFINITE_CHECK_THRESHOLD + int "Number of observed interations of thread inactivity" + default 3 + ---help--- + Specify number of iterations of thread inactivity after which to report as error + +config ERROR_REPORT_NTHREADS_IN_WAIT + int "Number of threads (max) to be reported as waiting" + default 8 + ---help--- + Specify number of threads (max) in wait + +config ERROR_REPORT_BACKTRACE_MAX_DEPTH + int "Number of functions to report from callstack" + default 6 + ---help--- + Specify depth of the backtrace + +endif #ERROR_REPORT_INFINITE_WAIT endif # ERROR_REPORT diff --git a/external/error_report/error_report.c b/external/error_report/error_report.c index be49aa87ba..e8388cbc00 100644 --- a/external/error_report/error_report.c +++ b/external/error_report/error_report.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,9 @@ #include #include #include +#include +#include +#include #include #define ERR_IP_ADRR_LEN 16 @@ -43,40 +47,160 @@ typedef struct { uint16_t front; int8_t q_pending; pthread_mutex_t err_mutex; +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT + sem_t infinity_loop_exit; + pthread_t infinity_thread; + uint8_t ntasks; +#endif err_fsm_t fsm; } g_error_info_t; static g_error_info_t g_err_info; +static int g_err_report_fd; -static unsigned long prv_fetch_taskaddr(int pid) +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT +static void prv_create_infwait_err_rec(int pid, int state, unsigned long task_addr) { - struct tcb_s *tcbptr = sched_gettcb(pid); - if (tcbptr != NULL) { - entry_t e = tcbptr->entry; - if ((tcbptr->flags & TCB_FLAG_TTYPE_MASK) == TCB_FLAG_TTYPE_PTHREAD) { - return (unsigned long)e.pthread; - } else { - return (unsigned long)e.main; + infinty_waitdata_t param; + int ret; + param.pid = pid; + ret = ioctl(g_err_report_fd, ERIOC_GET_BACKTRACE, ¶m); + if (ret < 0) { + nwerrdbg("ioctl failed\n"); + return; + } + error_infwait_data_t send_data; + struct timeval ts; + char sendbuf[ERR_BUFLEN]; + int i; + gettimeofday(&ts, NULL); + send_data.error_type = ERRTYPE_HANGING; + send_data.ncalls = param.ncalls; + send_data.task_state = param.task_state; + send_data.task_addr = param.entry; + for (i = 0; i < CONFIG_ERROR_REPORT_BACKTRACE_MAX_DEPTH; i++) { + send_data.backtrace[i] = param.backtrace[i]; + } + send_data.timestamp.tv_sec = ts.tv_sec; + send_data.timestamp.tv_usec = ts.tv_usec; + if (sizeof(sendbuf) < sizeof(error_infwait_data_t)) { + nwerrdbg("Buffer space inadequate\n"); + return; + } + memcpy(sendbuf, (void *)&send_data, sizeof(error_infwait_data_t)); + sendbuf[sizeof(error_infwait_data_t)] = 0; + /* Send error record to default endpoint */ + error_report_send(0, sendbuf, sizeof(error_infwait_data_t) + 1); +} +#endif + + +static int prv_check_infinite_wait(void *args) +{ + struct timespec ts; + struct timeval t1; + int ret; + + while (1) { + ret = ioctl(g_err_report_fd, ERIOC_CHECK_INFWAIT, NULL); + if (ret < 0) { + nwerr_vdbg("ioctl failed\n"); + goto done; + } + gettimeofday(&t1, NULL); + /* Logic to check every thread state whether it is in WAITSEM or WAITSIG mode */ + nwerr_vdbg("%lu: sem_timedwait for %lu seconds\n", t1.tv_sec, ts.tv_sec); + ts.tv_sec = t1.tv_sec + CONFIG_ERROR_REPORT_INFINITE_CHECK_TIMER; + ts.tv_nsec = t1.tv_usec * 1000; + ret = sem_timedwait(&g_err_info.infinity_loop_exit, &ts); + gettimeofday(&t1, NULL); + nwerr_vdbg("%lu: sem_timedwait returned with value %d\n", t1.tv_sec, ret); + if (!ret) { + break; + } else if ((ret < 0) && (get_errno() != ETIMEDOUT)) { + printf("exiting with errno %d", get_errno()); + goto done; } } return 0; +done: + sem_destroy(&g_err_info.infinity_loop_exit); + return ret; } err_status_t error_report_init(void) { + if (g_err_info.fsm == ERRSTATE_INITIALIZED) { + return ERR_SUCCESS; + } pthread_mutex_init(&g_err_info.err_mutex, NULL); g_err_info.fsm = ERRSTATE_INITIALIZED; + g_err_report_fd = open(ERROR_REPORT_DRVPATH, O_RDWR); + if (g_err_report_fd < 0) { + nwerrdbg("Failed to open error report driver file: %d\n", get_errno()); + goto err_case; + } +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT + int retval; + sem_init(&g_err_info.infinity_loop_exit, 0, 0); + retval = ioctl(g_err_report_fd, ERIOC_SET_CALLBACK, prv_create_infwait_err_rec); + if (retval < 0) { + nwerr_vdbg("Ioctl failed\n"); + sem_destroy(&g_err_info.infinity_loop_exit); + goto err_case; + } +#endif + return ERR_SUCCESS; +err_case: + pthread_mutex_destroy(&g_err_info.err_mutex); + return ERR_FAIL; +} + +err_status_t error_report_start_infinitywait(void) +{ + int r; + pthread_attr_t attr; + struct sched_param sparam; + sparam.sched_priority = 100; + if ((r = pthread_attr_setschedparam(&attr, &sparam)) != 0) { + nwerrdbg("%s: pthread_attr_setschedparam failed, status=%d\n", __func__, r); + return ERR_FAIL; + } + + if ((r = pthread_attr_setschedpolicy(&attr, SCHED_RR)) != 0) { + nwerrdbg("%s: pthread_attr_setschedpolicy failed, status=%d\n", __func__, r); + return ERR_FAIL; + } + + if ((r = pthread_attr_setstacksize(&attr, 4096)) != 0) { + nwerrdbg("%s: pthread_attr_setstacksize failed, status=%d\n", __func__, r); + return ERR_FAIL; + } + if ((r = pthread_create(&g_err_info.infinity_thread, &attr, (pthread_startroutine_t)prv_check_infinite_wait, NULL)) != 0) { + nwerrdbg("%s: pthread_create failed, status=%d\n", __func__, r); + return ERR_FAIL; + } + nwerr_vdbg("Created infinity thread\n"); return ERR_SUCCESS; } err_status_t error_report_deinit(void) { +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT + sem_post(&g_err_info.infinity_loop_exit); + pthread_join(g_err_info.infinity_thread, NULL); +#endif + pthread_mutex_lock(&g_err_info.err_mutex); + g_err_info.front = 0; + g_err_info.rear = 0; + g_err_info.q_pending = 0; + g_err_info.fsm = ERRSTATE_UNINITIALIZED; + pthread_mutex_unlock(&g_err_info.err_mutex); pthread_mutex_destroy(&g_err_info.err_mutex); - return ERR_SUCCESS; } -error_data_t *error_report_data_create(int module_id, int error_code, uint32_t pc_value) +error_data_t *error_report_data_create(int err_type, int module_id, int error_code, uint32_t pc_value, uint32_t task_addr) { struct timeval ts; gettimeofday(&ts, NULL); @@ -85,13 +209,27 @@ error_data_t *error_report_data_create(int module_id, int error_code, uint32_t p pthread_mutex_lock(&g_err_info.err_mutex); ret = (error_data_t *) g_error_report + g_err_info.rear; + ret->error_type = ERRTYPE_SERVICE; ret->timestamp.tv_sec = ts.tv_sec; ret->timestamp.tv_usec = ts.tv_usec; ret->module_id = module_id; ret->error_code = error_code; ret->pc_value = pc_value; nwerr_vdbg("pc_value: %08x\n", pc_value); - ret->task_addr = prv_fetch_taskaddr(getpid()); + if (module_id == ERRMOD_INFINITE_WAIT) { + ret->task_addr = task_addr; + } else { + int retval; + thread_entry_t thd; + thd.pid = getpid(); + retval = ioctl(g_err_report_fd, ERIOC_GET_ENTRY, &thd); + if (retval < 0) { + nwerr_vdbg("Ioctl failed\n"); + ret = NULL; + goto done; + } + ret->task_addr = thd.entry; + } nwerr_vdbg("task_addr: %08x\n", ret->task_addr); if (g_err_info.fsm == ERRSTATE_MEMUNDERFLOW) { g_err_info.fsm = ERRSTATE_INITIALIZED; @@ -100,6 +238,7 @@ error_data_t *error_report_data_create(int module_id, int error_code, uint32_t p if (g_err_info.q_pending < CONFIG_ERROR_REPORT_NENTRIES) { g_err_info.q_pending++; } +done: pthread_mutex_unlock(&g_err_info.err_mutex); return ret; } @@ -117,11 +256,13 @@ int error_report_data_read(char *readbuf) } if (readbuf != NULL) { if (!nentries) { + nwerr_vdbg("No entries\n"); g_err_info.fsm = ERRSTATE_MEMUNDERFLOW; } else { while (nentries) { if (nbytes + sizeof(error_data_t) >= ERR_BUFLEN) { g_err_info.fsm = ERRSTATE_BUFFER_EXCEEDED; + nwerr_vdbg("Buffer exceeded\n"); goto err_read_done; } memcpy(readbuf + nbytes, (char *)(g_error_report + g_err_info.front), sizeof(error_data_t)); @@ -134,7 +275,7 @@ int error_report_data_read(char *readbuf) } err_read_done: - printf("Report in Hex: "); + printf("Report (%d bytes) in Hex: ", nbytes); for (i = 0; i < nbytes; i++) { printf("%02x ", readbuf[i]); } @@ -150,7 +291,7 @@ int error_report_send(const char *ep, char *readbuf, int readbuf_len) int socket_fd; struct sockaddr_in endpoint; char ip_addr[ERR_IP_ADRR_LEN]; - socket_fd = socket(AF_INET, SOCK_STREAM, 0); + socket_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (socket_fd < CONFIG_NSOCKET_DESCRIPTORS) { return ERR_FAIL; } diff --git a/external/include/error_report/error_report.h b/external/include/error_report/error_report.h index 8a207f17af..727641d89c 100644 --- a/external/include/error_report/error_report.h +++ b/external/include/error_report/error_report.h @@ -37,6 +37,7 @@ enum error_module_id_e { ERRMOD_WPA_SUPPLICANT, ERRMOD_LWIP_CORE, ERRMOD_LWIP_SOCKET, + ERRMOD_INFINITE_WAIT, }; #ifdef CONFIG_WIFIMGR_ERROR_REPORT @@ -62,33 +63,50 @@ typedef enum error_code_wifi_manager_e error_code_wifi_manager_t; #endif typedef struct { - struct timeval timestamp; + uint8_t error_type; + uint8_t module_id; + int16_t error_code; uint32_t pc_value; uint32_t task_addr; - int16_t error_code; - uint16_t module_id; + struct timeval timestamp; } error_data_t; +typedef struct { + uint8_t error_type; + uint8_t ncalls; + uint16_t task_state; + uint32_t backtrace[CONFIG_ERROR_REPORT_BACKTRACE_MAX_DEPTH]; + uint32_t task_addr; + struct timeval timestamp; +} error_infwait_data_t; + typedef enum { ERR_FAIL = -1, ERR_SUCCESS, } err_status_t; +typedef enum { + ERRTYPE_SERVICE, + ERRTYPE_HANGING, +} err_type_t; + /** * @file error_report/error_report.h * @brief Provides APIs for Error Reporting Module */ err_status_t error_report_init(void); +err_status_t error_report_start_infinitywait(void); err_status_t error_report_deinit(void); -error_data_t *error_report_data_create(int module_id, int error_code, uint32_t pc_value); +error_data_t *error_report_data_create(int err_type, int module_id, int error_code, uint32_t pc_value, uint32_t task_addr); int error_report_data_read(char *readbuf); int error_report_send(const char *ep, char *readbuf, int readbuf_len); #define ERR_DATA_CREATE(module_id, reason_code) \ do { \ uint32_t pc_value; \ - __asm volatile ("mov %[result], r15":[result] "=r" (pc_value)); \ - if (error_report_data_create(module_id, reason_code, pc_value) == NULL) {\ + __asm volatile ("mov %[result], r15":[result] "=r" (pc_value));\ + printf("Inside ERR_DATA_CREATE\n"); \ + if (error_report_data_create(ERRTYPE_SERVICE, module_id, reason_code, pc_value, 0) == NULL) {\ printf("Failed to create error log\n"); \ } \ } while(0) diff --git a/os/arch/arm/src/common/up_initialize.c b/os/arch/arm/src/common/up_initialize.c index a078c95cd3..6ef9f43eae 100644 --- a/os/arch/arm/src/common/up_initialize.c +++ b/os/arch/arm/src/common/up_initialize.c @@ -257,10 +257,6 @@ void up_initialize(void) (void)telnet_initialize(); #endif -#ifdef CONFIG_ERROR_REPORT - /* Initialize Error Reporting for network */ - error_report_init(); -#endif /* Initialize the network */ up_netinitialize(); diff --git a/os/drivers/Makefile b/os/drivers/Makefile index 36a65f6dbb..efa5d52b05 100644 --- a/os/drivers/Makefile +++ b/os/drivers/Makefile @@ -73,6 +73,7 @@ endif include analog$(DELIM)Make.defs include audio$(DELIM)Make.defs include bch$(DELIM)Make.defs +include error_report$(DELIM)Make.defs include fota$(DELIM)Make.defs include gpio$(DELIM)Make.defs include i2c$(DELIM)Make.defs diff --git a/os/drivers/error_report/Make.defs b/os/drivers/error_report/Make.defs new file mode 100644 index 0000000000..78e56ba4ad --- /dev/null +++ b/os/drivers/error_report/Make.defs @@ -0,0 +1,29 @@ +########################################################################## +# +# Copyright 2018 Samsung Electronics All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +# +########################################################################## +# Include error report drivers + +ifeq ($(CONFIG_ERROR_REPORT),y) + +CSRCS += error_report_drv.c + +# Include error report driver support + +DEPPATH += --dep-path error_report +VPATH += :error_report + +endif diff --git a/os/drivers/error_report/error_report_drv.c b/os/drivers/error_report/error_report_drv.c new file mode 100644 index 0000000000..a5503bcdcc --- /dev/null +++ b/os/drivers/error_report/error_report_drv.c @@ -0,0 +1,302 @@ +/**************************************************************************** + * + * Copyright 2018 Samsung Electronics All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + * + ****************************************************************************/ +/**************************************************************************** + * Included Files + ****************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/**************************************************************************** + * Private Function Prototypes + ****************************************************************************/ + +static int error_report_ioctl(FAR struct file *filep, int cmd, unsigned long arg); +static ssize_t err_report_read(FAR struct file *filep, FAR char *buffer, size_t len); +static ssize_t err_report_write(FAR struct file *filep, FAR const char *buffer, size_t len); +/**************************************************************************** + * Private Data + ****************************************************************************/ +#define INFINITE_CHECK_HIGH_THRESHOLD 100 +static const struct file_operations err_report_fops = { + 0, /* open */ + 0, /* close */ + err_report_read, /* read */ + err_report_write, /* write */ + 0, /* seek */ + error_report_ioctl /* ioctl */ +#ifndef CONFIG_DISABLE_POLL + , 0 /* poll */ +#endif +}; + +static create_infwait_err_rec g_create_err_rec; +static infinity_waitq_t g_thd_waitq[CONFIG_MAX_TASKS]; +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT +static const char *g_exceptional_waits[] = {"tash", "lpwork", "hpwork", "LWIP_TCP/IP", "Network Error Reporting", "WPA Supplicant", "WLAN Driver mxmgmt", "WPA Ctrl Iface FIFO", "Wi-Fi API monitor"}; +#endif + +/**************************************************************************** + * Private Functions + ****************************************************************************/ +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT +#define INSERT_INFINITE_WAITQ(pos, tcb, cpu) \ + do { \ + g_thd_waitq[pos].pid = tcb->pid; \ + g_thd_waitq[pos].task_state = tcb->task_state;\ + g_thd_waitq[pos].count = 1; \ + g_thd_waitq[pos].active_cpu = cpu.active;\ + nwerr_vdbg("Inserted pid %d at pos %d\n", tcb->pid, pos);\ + } while (0) + +#define DELETE_INFINITE_WAITQ(pos) \ + do { \ + g_thd_waitq[pos].pid = 0; \ + g_thd_waitq[pos].count = 0; \ + g_thd_waitq[pos].task_state = 0; \ + g_thd_waitq[pos].active_cpu = 0; \ + nwerr_vdbg("Reset pos %d\n", pos); \ + } while (0) +#endif + +#define RETURN_ADDR_FROM_TCB(retval, tcbptr) \ + do { \ + if (tcbptr != NULL) { \ + entry_t e = tcbptr->entry; \ + if ((tcbptr->flags & TCB_FLAG_TTYPE_MASK) == TCB_FLAG_TTYPE_PTHREAD) {\ + retval = (unsigned long)e.pthread; \ + } else { \ + retval = (unsigned long)e.main; \ + } \ + } else { \ + retval = 0; \ + } \ + } while (0) + +static int prv_is_reserved(const char *taskname) +{ + int i; + if (taskname == NULL) { + return -1; + } + for (i = 0; i < sizeof(g_exceptional_waits) / sizeof(g_exceptional_waits[0]); i++) { + if (!strncmp(taskname, g_exceptional_waits[i], strlen(taskname))) { + return 1; + } + } + return 0; +} + +static ssize_t err_report_read(FAR struct file *filep, FAR char *buffer, size_t len) +{ + return 0; +} + +static ssize_t err_report_write(FAR struct file *filep, FAR const char *buffer, size_t len) +{ + return 0; +} + +static unsigned long prv_fetch_taskaddr(int pid) +{ + unsigned long retval; + struct tcb_s *tcbptr = sched_gettcb(pid); + RETURN_ADDR_FROM_TCB(retval, tcbptr); + return retval; +} + +static void prv_tcb_handler(FAR struct tcb_s *tcb, void *args) +{ + int pos = 0; + if ((tcb->task_state >= TSTATE_WAIT_SEM) && (tcb->task_state < NUM_TASK_STATES)) { + int ret; + struct cpuload_s cpu; + ret = clock_cpuload(tcb->pid, 0, &cpu); + if (ret != OK) { + return; + } + /* Hash pid over the waitq range */ + pos = tcb->pid % CONFIG_MAX_TASKS; + if ((g_thd_waitq[pos].pid == tcb->pid)) { + if (g_thd_waitq[pos].task_state == tcb->task_state) { + if (g_thd_waitq[pos].active_cpu == cpu.active) { + int threshold; + g_thd_waitq[pos].count++; + if (prv_is_reserved(tcb->name) == 1) { + threshold = INFINITE_CHECK_HIGH_THRESHOLD; + } else { + threshold = CONFIG_ERROR_REPORT_INFINITE_CHECK_THRESHOLD; + } + + nwerr_vdbg("Inc wait count for pid %d\n", tcb->pid); + if (g_thd_waitq[pos].count >= threshold) { + // Add code for error report addition + uint32_t task_addr; + RETURN_ADDR_FROM_TCB(task_addr, tcb); + nwerr_vdbg("pid %d blocked\n", tcb->pid); + g_create_err_rec(tcb->pid, tcb->task_state, task_addr); + /* Reset the count, indicating that is free for insert */ + DELETE_INFINITE_WAITQ(pos); + } + } else { + /* The thread was running in between, and blocked again */ + INSERT_INFINITE_WAITQ(pos, tcb, cpu); + } + } else { + /* Ideally does not happen, but we account for it anyway */ + INSERT_INFINITE_WAITQ(pos, tcb, cpu); + } + } else if (!g_thd_waitq[pos].pid) { + /* Add new entry */ + INSERT_INFINITE_WAITQ(pos, tcb, cpu); + } else { + /* Collision resolution, find the next empty slot */ + int i; + for (i = pos+1; i != pos; i = (i + 1) % CONFIG_MAX_TASKS) { + if (g_thd_waitq[i].pid == 0) { + /* Found an empty slot, insert values here */ + INSERT_INFINITE_WAITQ(i, tcb, cpu); + break; + } + } + if (i == pos) { + nwerrdbg("Cannot find a slot to insert values\n"); + } + } + } else { + /* Maybe the pid was blocked during the + * last sampling instance, and + * is active again. Reset the data + * in this case. + */ + /* Hash pid over the waitq range */ + pos = tcb->pid % CONFIG_MAX_TASKS; + if ((g_thd_waitq[pos].pid == tcb->pid)) { + /* Reset the count, indicating that is free for insert */ + DELETE_INFINITE_WAITQ(pos); + } else if (g_thd_waitq[pos].pid) { + /* Collision resolution, find the slot using linear search */ + int i; + for (i = pos+1; i != pos; i = (i + 1) % CONFIG_MAX_TASKS) { + if (g_thd_waitq[i].pid == tcb->pid) { + /* Found an empty slot, insert values here */ + DELETE_INFINITE_WAITQ(i); + break; + } + } + } + } +} + + + +/************************************************************************************ + * Name: error_report_ioctl + * + * Description: The ioctl method for error reporting. + * + ************************************************************************************/ +static int error_report_ioctl(FAR struct file *filep, int cmd, unsigned long arg) +{ + int ret = -EINVAL; + nwerr_vdbg("cmd: %d arg: %ld\n", cmd, arg); + /* Handle built-in ioctl commands */ + switch (cmd) { + case ERIOC_GET_ENTRY: + { + thread_entry_t *thd = (thread_entry_t *) ((uintptr_t)arg); + thd->entry = prv_fetch_taskaddr(thd->pid); + ret = OK; + } + break; + case ERIOC_CHECK_INFWAIT: + sched_foreach(prv_tcb_handler, NULL); + ret = OK; + break; + case ERIOC_GET_BACKTRACE: + { + infinty_waitdata_t *waitdata = (infinty_waitdata_t *) ((uintptr_t)arg); + struct tcb_s *tcb = sched_gettcb(waitdata->pid); + if (tcb == NULL) { + break; + } + waitdata->ncalls = 0; + /* Read the functions from the callstack into waitdata structure */ + uint32_t *ptr; + ptr = (uint32_t *)tcb->xcp.regs[13]; + while ((void *)ptr < tcb->adj_stack_ptr && (waitdata->ncalls < CONFIG_ERROR_REPORT_BACKTRACE_MAX_DEPTH)) { + if (*ptr >= 0x04000000 && *ptr <= 0x04800000) { + waitdata->backtrace[waitdata->ncalls] = *ptr - 4; + nwerr_vdbg("Stack: 0x%08x\n", *ptr - 4); + waitdata->ncalls++; + } + ptr++; + } + /* Fill out the remaining slots in waitdata with 0 */ + while (waitdata->ncalls < CONFIG_ERROR_REPORT_BACKTRACE_MAX_DEPTH) { + waitdata->backtrace[waitdata->ncalls] = 0; + waitdata->ncalls++; + } + waitdata->task_state = tcb->task_state; + RETURN_ADDR_FROM_TCB(waitdata->entry, tcb); + ret = OK; + } + break; + case ERIOC_SET_CALLBACK: + { + create_infwait_err_rec cb = (create_infwait_err_rec) ((uintptr_t)arg); + g_create_err_rec = cb; + ret = OK; + } + break; + default: + nwerrdbg("Unrecognized cmd: %d arg: %ld\n", cmd, arg); + break; + } + return ret; +} + +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: error_report_drv_register + * + * Description: + * Register task management path, ERROR_REPORT_DRVPATH. This function is + * called within the error reporting module, during initialization. + * The callback is implemented in the error report module, and passed as + * an argument to this function. when the error report driver infers the + * presence of long waiting threads, it signals the error reporting module + * using the registered callback. + * @param[in] callback function with signature type create_infwait_err_rec. + * Used for reporting infinitely waiting threads. + ****************************************************************************/ + +void error_report_drv_register(void) +{ + (void)register_driver(ERROR_REPORT_DRVPATH, &err_report_fops, 0666, NULL); +} diff --git a/os/include/tinyara/error_report_internal.h b/os/include/tinyara/error_report_internal.h new file mode 100644 index 0000000000..19b6853910 --- /dev/null +++ b/os/include/tinyara/error_report_internal.h @@ -0,0 +1,76 @@ +/**************************************************************************** + * + * Copyright 2018 Samsung Electronics All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific + * language governing permissions and limitations under the License. + * + ****************************************************************************/ + +#ifndef __INCLUDE_TINYARA_ERROR_REPORT_INTERNAL_H +#define __INCLUDE_TINYARA_ERROR_REPORT_INTERNAL_H + +/* This file will be used to provide definitions to support + * error report framework + */ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include +#include +#include + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ +#define ERROR_REPORT_DRVPATH "/dev/errreport" +typedef struct { + pid_t pid; + unsigned long entry; +} thread_entry_t; + +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT +typedef struct { + pid_t pid; + uint8_t task_state; + uint8_t count; + uint32_t active_cpu; +} infinity_waitq_t; + +typedef struct { + pid_t pid; + uint16_t ncalls; + uint8_t task_state; + unsigned long entry; + uint32_t backtrace[CONFIG_ERROR_REPORT_BACKTRACE_MAX_DEPTH]; +} infinty_waitdata_t; +typedef void (*create_infwait_err_rec)(int pid, int state, unsigned long task_addr); +#endif + +void error_report_drv_register(void); + +#ifdef __cplusplus +#define EXTERN extern "C" +extern "C" { +#else +#define EXTERN extern +#endif + +#undef EXTERN +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_TINYARA_ERROR_REPORT_INTERNAL_H */ + diff --git a/os/include/tinyara/fs/ioctl.h b/os/include/tinyara/fs/ioctl.h index a67746fb5d..b5e3d83066 100644 --- a/os/include/tinyara/fs/ioctl.h +++ b/os/include/tinyara/fs/ioctl.h @@ -94,6 +94,7 @@ #define _FOTABASE (0x1900) /* FOTA ioctl commands */ #define _GPIOBASE (0x2000) /* GPIO ioctl commands */ #define _TMBASE (0x2100) /* Task Management ioctl commands */ +#define _ERBASE (0x2200) /* Error Report ioctl commands */ #define _TESTIOCBASE (0xfe00) /* KERNEL TEST DRV module ioctl commands */ /* boardctl() commands share the same number space */ @@ -369,6 +370,19 @@ #define TMIOC_PTHREAD_PARENT _TMIOC(0x0008) #endif +/* TinyAra Error Report driver ioctl definitions ************************/ +#ifdef CONFIG_ERROR_REPORT +#define _ERIOCVALID(c) (_IOC_TYPE(c) == _ERBASE) +#define _ERIOC(nr) _IOC(_ERBASE, nr) + +#define ERIOC_GET_ENTRY _ERIOC(0x0001) +#define ERIOC_CHECK_INFWAIT _ERIOC(0x0002) +#define ERIOC_GET_BACKTRACE _ERIOC(0x0004) +#ifdef CONFIG_ERROR_REPORT_INFINITE_WAIT +#define ERIOC_SET_CALLBACK _ERIOC(0x0005) +#endif +#endif + /**************************************************************************** * Public Type Definitions ****************************************************************************/ diff --git a/os/kernel/init/os_bringup.c b/os/kernel/init/os_bringup.c index 1c6196ee8f..9462c37276 100644 --- a/os/kernel/init/os_bringup.c +++ b/os/kernel/init/os_bringup.c @@ -80,6 +80,9 @@ #ifdef CONFIG_PAGING #include "paging/paging.h" #endif +#ifdef CONFIG_ERROR_REPORT +#include +#endif /**************************************************************************** * Pre-processor Definitions @@ -276,6 +279,10 @@ static inline void os_do_appstart(void) } #endif +#ifdef CONFIG_ERROR_REPORT + /* Starting error reporting module */ + error_report_drv_register(); +#endif svdbg("Starting application main thread\n"); #ifdef CONFIG_BUILD_PROTECTED