分享
三行代码  ›  专栏  ›  技术社区  ›  Medwards

R对行中的多个条件使用any()-更复杂的版本

  •  1
  • Medwards  · 技术社区  · 1 周前

    R using any() on multiple conditions within row

    但是,我不确定是否可以将它扩展到我的第二个需要,我试图确定From_ID中的ID是否在该组中较早的to_ID行中。

    library(dplyr);library(anytime)
    
    
    Tag <- c('V1','V1','V1','V1','V1','V1','V2','V2','V2','V3','V3','V3','V3','V4','V4','V4')
    From_ID <- c('BL342','SD234','FR212','PX123','KJ214','BL342',  'FR231','BL231','CV231',   'KJ875','PX239','TR543','FR342',    'DS329','FR321','DF321')
    To_ID <- c('FR212','BL342','SD234','FG342','BL342','KJ214',  'BL231','FR231','KJ123',   'FG432','KJ123','TR998','PX239',    'HG734','DF321','FR321')
    
    Date <-  sample(seq(anydate('2017-01-01'), anydate('2020-01-01'), by="day"), 16)
    
    df <- data.frame(Tag, From_ID, To_ID, Date) %>% group_by(Tag) %>% arrange(Tag, desc(Date)) %>% mutate(Rank = row_number()) 
    
    

    如果我不关心我之前做过的行位置:

    library(stringr)
    #pivot wider, unite, str_extract to get a list of words, then detect in that list using case_when 
    wide <- df %>% 
            pivot_wider(id_cols = Tag, names_from = Date, values_from  = To_ID) %>%
            unite(d, contains("-"), sep =",", na.rm=T) %>% 
            mutate(Extract = str_extract_all(d, "\\[a-z]{2}[0-9]{3}")) %>% 
            full_join(df) %>% 
            mutate(SY_Del = case_when(b == 'Farm' &
                                      str_detect(Extract, From_ID) == T ~ T,
                                  TRUE ~ F)
    
    

    当From_ID包含在任何日期早于被评估的行中时,即。 expected output

       Tag From_ID To_ID       Date Rank  Flag
    1   V1   FR212 SD234 2019-08-24    1  TRUE
    2   V1   BL342 KJ214 2019-05-05    2  TRUE
    3   V1   PX123 FG342 2019-04-22    3 FALSE
    4   V1   KJ214 BL342 2019-01-01    4 FALSE
    5   V1   SD234 BL342 2018-09-13    5 FALSE
    6   V1   BL342 FR212 2018-04-30    6 FALSE
    7   V2   FR231 BL231 2019-09-16    1  TRUE
    8   V2   CV231 KJ123 2018-01-07    2 FALSE
    9   V2   BL231 FR231 2017-01-11    3 FALSE
    10  V3   KJ875 FG432 2019-11-14    1 FALSE
    11  V3   TR543 TR998 2019-10-26    2 FALSE
    12  V3   FR342 PX239 2019-07-02    3 FALSE
    13  V3   PX239 KJ123 2017-07-15    4 FALSE
    14  V4   DS329 HG734 2019-01-30    1 FALSE
    15  V4   DF321 FR321 2017-05-06    2  TRUE
    16  V4   FR321 DF321 2017-03-20    3 FALSE
    
    
    1 回复  |  直到 1 周前
        1
  •  1
  •   akrun    1 周前

    将“日期”转换为 Date 班级( dmy -从 lubridate ), arrange 按“Tag”、“Date”分组的数据按“Tag”分组,通过在 row_number ,检查元素“From_ID”是否为 %in% 从第一行到那一行的“收件人ID”, ungroup 安排 基于“Tag”、“Rank”列按原始顺序返回的数据集

    library(dplyr)
    library(purrr)
    library(lubridate) 
    df %>%
         # // convert to Date class
         mutate(Date = dmy(Date)) %>% 
         # // order the dataset by Tag, Date
         arrange(Tag, Date) %>% 
         # // grouped by Tag
         group_by(Tag) %>% 
         # // loop over the sequence of rows
         mutate(Flag2 = map_lgl(row_number(),
               # // check whether the 'From_ID' of that row is in 
               # // any of the previous row elements of 'To_ID'
               ~ From_ID[.x] %in% To_ID[1:(.x)])) %>% 
         ungroup %>% 
         # // order back to original
         arrange(Tag, Rank)
    # A tibble: 16 x 7
    #   Tag   From_ID To_ID Date        Rank Flag  Flag2
    #   <chr> <chr>   <chr> <date>     <int> <lgl> <lgl>
    # 1 V1    FR212   SD234 2019-08-24     1 TRUE  TRUE 
    # 2 V1    BL342   KJ214 2019-05-05     2 TRUE  TRUE 
    # 3 V1    PX123   FG342 2019-04-22     3 FALSE FALSE
    # 4 V1    KJ214   BL342 2019-01-01     4 FALSE FALSE
    # 5 V1    SD234   BL342 2018-09-13     5 FALSE FALSE
    # 6 V1    BL342   FR212 2018-04-30     6 FALSE FALSE
    # 7 V2    FR231   BL231 2019-09-16     1 TRUE  TRUE 
    # 8 V2    CV231   KJ123 2018-01-07     2 FALSE FALSE
    # 9 V2    BL231   FR231 2017-01-11     3 FALSE FALSE
    #10 V3    KJ875   FG432 2019-11-14     1 FALSE FALSE
    #11 V3    TR543   TR998 2019-10-26     2 FALSE FALSE
    #12 V3    FR342   PX239 2019-07-02     3 FALSE FALSE
    #13 V3    PX239   KJ123 2017-07-15     4 FALSE FALSE
    #14 V4    DS329   HG734 2019-01-30     1 FALSE FALSE
    #15 V4    DF321   FR321 2017-05-06     2 TRUE  TRUE 
    #16 V4    FR321   DF321 2017-03-20     3 FALSE FALSE
    

    数据

    df <- structure(list(Tag = c("V1", "V1", "V1", "V1", "V1", "V1", "V2", 
    "V2", "V2", "V3", "V3", "V3", "V3", "V4", "V4", "V4"), From_ID = c("FR212", 
    "BL342", "PX123", "KJ214", "SD234", "BL342", "FR231", "CV231", 
    "BL231", "KJ875", "TR543", "FR342", "PX239", "DS329", "DF321", 
    "FR321"), To_ID = c("SD234", "KJ214", "FG342", "BL342", "BL342", 
    "FR212", "BL231", "KJ123", "FR231", "FG432", "TR998", "PX239", 
    "KJ123", "HG734", "FR321", "DF321"), Date = c("24/08/2019", "5/05/2019", 
    "22/04/2019", "1/01/2019", "13/09/2018", "30/04/2018", "16/09/2019", 
    "7/01/2018", "11/01/2017", "14/11/2019", "26/10/2019", "2/07/2019", 
    "15/07/2017", "30/01/2019", "6/05/2017", "20/03/2017"), Rank = c(1L, 
    2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L), 
        Flag = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
        FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)),
        class = "data.frame", row.names = c("1", 
    "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
    "14", "15", "16"))