像这样的代码

def IsEven(n):
    if n%2==0:
        return "Is even"
    else:
        return "Is odd"

可以转换为如下函数

def isEven(n):
    return ["Is even","Is odd"][n%2==0]

问题是这样的代码:

def intervalsToOutput(n):
    intervals=[(x1,x2), (x3,x4), (x5,x6), ... (xn,xn+1)]
    if x1<=n<=x2:
        return "in first interval"
    elif x3<=n<=x4:
        return "in second interval"
    elif x5<=n<=x6:
        return "in third interval"
    ...
    elif xn<=n<=xn+1:
        return "in last interval"

... can be replaced efficiently with a function like this:
(assuming the intervals (xi,xi+1) are not overlapping, and are sorted by xi)

def intervalsToOutput(n):
    intervals=[(x1,x2), (x3,x4), (x5,x6), ... (xn,xn+1)]
    answer=["in first interval","in second interval","in third interval",...,"in last interval"]
    return answer[index of (n in Interval)]
    

我做的最好的(looking for speed)是

def intervalsToOutput(n):
    intervals=[(x1,x2), (x3,x4), (x5,x6), ... (xn,xn+1)]
    answer=["in first interval","in second interval","in third interval",...,"in last interval"]
    import bisect as bisect
    return answer[bisect.bisect_left(intervals, (n, )) - 1]# -1 because it is a zero based list

What bisect_left does is to find the position of n in intervals (in O(log(n)) time) by comparing (n,) with (xn,xn+1) But the purpose of bisect is to find the insertion place, so it fails for any n which is x_{i+1}<n<=x_{j}

... (x_i, x_{i+1}), (x_j, x_{j+1}) ...
[x_i______x_i+1] fails on this interval x_j [________x_j+1]


EDIT:

#generate intervals and answers
import random
min=0
max=20
numIntervals=6

def Intervals(min,max,n):
    randInts = random.sample(range(min, max), n * 2)
    randInts.sort()
    return [(x1,x2) for x1,x2 in zip(randInts[::2], randInts[1::2])]

intervals=Intervals(min,max,numIntervals)
answer=[f"In {n}th interval" for n in range(len(intervals))]

#Implement solution

import intervaltree

tree = intervaltree.IntervalTree() 
[tree.addi(i[0],i[1],a) for i,a in zip(intervals,answer)]

def intervalsToOutput(x):
    answer=tree.at(x)
    if len(answer)>0:
        return answer.pop().data
    return "value not found"

print(intervals)
[print(f"{x} is ",intervalsToOutput(x)) for x in random.sample(range(min,max),numIntervals)]

推荐答案

我会这样做:

intervals=[(1,2),(6,8),(22,45),(101,110)]

tgt=27
idx=next(i for i,t in enumerate(intervals) if t[0]<=tgt<=t[1])

>>> idx
2

如果要捕获未找到的条件,请使用try / except:

try:
   idx=next(i for i,t in enumerate(intervals) if t[0]<=tgt<=t[1])
except StopIteration:
    # not found

或使用默认形式next:

idx=next((i for i,t in enumerate(intervals) if t[0]<=tgt<=t[1]), None)

公平地说,这种方法很慢.这是一条非常快的路.

您可以获取bisect的源并使用自定义逻辑进行修改,以判断列表中的每个元组,查看是否满足t[0] <= x <= t[1]的条件,如果无法满足该条件,则返回None.

给定此形式的元组列表:

[(0, 3), (5, 10), (13, 17)] ... [(69735, 69739), (69742, 69746), (69749, 69752)]

您可以使用自定义对分搜索来查找满足所述条件的元组:

def bisect_search(a, x, lo=0, hi=None):
    if lo < 0:
        raise ValueError('lo must be non-negative')
    if hi is None or hi>len(a):
        hi = len(a)

    while lo < hi:
        mid = (lo + hi) // 2
        f_mid=-1 if x<a[mid][0] else 1 if x>a[mid][-1] else 0
        if f_mid < 0:
            hi = mid
        elif f_mid > 0:
            lo = mid + 1
        else:
            return mid
        
    return None

以下是这两种方法的基准:

import random 
import time 

def gen_tuples(cnt, span=(1,5)):
    li=[]
    
    lo,hi=0,random.randint(*span)
    
    for _ in range(cnt):
        li.append((lo,hi))
        lo=li[-1][-1]+random.randint(*span)
        hi=lo+random.randint(span[0]+2,span[-1])
        
    rando=random.randint(cnt//2,cnt-1)
    to_find=li[rando][random.randint(0,len(li[rando])-1)]   
    return (rando,to_find,li)



def next_(a, x):
    return next((i for i,t in enumerate(a) if t[0] <= x <=t [1]), None)
    
def bisect_search(a, x, lo=0, hi=None):
    if lo < 0:
        raise ValueError('lo must be non-negative')
    if hi is None or hi>len(a):
        hi = len(a)

    while lo < hi:
        mid = (lo + hi) // 2
        f_mid=-1 if x<a[mid][0] else 1 if x>a[mid][-1] else 0
        if f_mid < 0:
            hi = mid
        elif f_mid > 0:
            lo = mid + 1
        else:
            return mid
        
    return None


def cmpthese(funcs, args=(), cnt=1000, rate=True, micro=True):
    """Generate a Perl style function benchmark"""                   
    def pprint_table(table):
        """Perl style table output"""
        def format_field(field, fmt='{:,.0f}'):
            if type(field) is str: return field
            if type(field) is tuple: return field[1].format(field[0])
            return fmt.format(field)     
        
        def get_max_col_w(table, index):
            return max([len(format_field(row[index])) for row in table])         
        
        col_paddings=[get_max_col_w(table, i) for i in range(len(table[0]))]
        for i,row in enumerate(table):
            # left col
            row_tab=[row[0].ljust(col_paddings[0])]
            # rest of the cols
            row_tab+=[format_field(row[j]).rjust(col_paddings[j]) for j in range(1,len(row))]
            print(' '.join(row_tab))                
            
    results={}
    for i in range(cnt):
        for f in funcs:
            start=time.perf_counter_ns()
            f(*args)
            stop=time.perf_counter_ns()
            results.setdefault(f.__name__, []).append(stop-start)
    results={k:float(sum(v))/len(v) for k,v in results.items()}     
    fastest=sorted(results,key=results.get, reverse=True)
    table=[['']]
    if rate: table[0].append('rate/sec')
    if micro: table[0].append('\u03bcsec/pass')
    table[0].extend(fastest)
    for e in fastest:
        tmp=[e]
        if rate:
            tmp.append('{:,}'.format(int(round(float(cnt)*1000000.0/results[e]))))
            
        if micro:
            tmp.append('{:,.1f}'.format(results[e]/float(cnt)))
            
        for x in fastest:
            if x==e: tmp.append('--')
            else: tmp.append('{:.1%}'.format((results[x]-results[e])/results[e]))
        table.append(tmp) 
        
    pprint_table(table)                    
    
    
    
if __name__=='__main__':
    import sys
    print(sys.version)
    cases=(
        ('small, found', True, 100),
        ('small, not found', False, 100),
        ('large, found', True, 10000),
        ('large, not found', False, 10000)
    )
    for txt, f, cnt in cases:
        rando,to_find,li=gen_tuples(cnt)
        tgt=to_find if f else -1
        args=(li, tgt)
        f1,f2=bisect_search(*args), next_(*args)
        print(f'\n{txt}: {f1}, {f2}')
        cmpthese([next_,bisect_search], args)  

打印:

3.10.2 (main, Feb  2 2022, 06:19:27) [Clang 13.0.0 (clang-1300.0.29.3)]

small, found: 84, 84
              rate/sec μsec/pass  next_ bisect_search
next_          121,533       8.2     --        -82.0%
bisect_search  675,074       1.5 455.5%            --

small, not found: None, None
              rate/sec μsec/pass  next_ bisect_search
next_          155,885       6.4     --        -81.6%
bisect_search  847,744       1.2 443.8%            --

large, found: 8824, 8824
              rate/sec μsec/pass    next_ bisect_search
next_            1,232     811.9       --        -99.6%
bisect_search  299,677       3.3 24230.9%            --

large, not found: None, None
              rate/sec μsec/pass    next_ bisect_search
next_            1,524     656.0       --        -99.6%
bisect_search  340,112       2.9 22211.5%            --

这表明bisect_search方法的速度要快得多.

注意:try 使用Python 3.10中的key对分来做同样的事情很有诱惑力.这是不可能的,因为目标元组范围的上限未知,并且(x,)的方法仅与目标元组范围的下限进行比较.

Python-3.x相关问答推荐

如何从拼图分区数据集中读取数据到Polar

循环遍历数据框以提取特定值

文件名中的文件打开和撇号

Pandas 根据条件增加Dataframe列

给定panda代码的分组和百分比分布pyspark等价

命名空间前缀无效

通过匹配第一列的行值,逐个单元格地添加两个Pandas 数据框中的浮点数

如何在pyspark的列中按连续1分组并保持具有特定大小的组

使用正则表达式提取字符串之间的文本

Pygame 错误地渲染等距图像

有效地缩短列表,直到第一次和最后一次出现不同于 None 的值

为什么 List 不能包含多种类型?

错误:预期语句,发现 py:Dedent

日志(log)模块不适用于 Python3

为什么 Django South 1.0 使用 iteritems()?

pandas 中 df.reindex() 和 df.set_index() 方法的区别

如何将 SimpleGUI 与 Python 2.7 和 3.0 shell 集成

如何使用 python 库连接到 poloniex.com websocket api

清除 PyCharm 运行窗口

为什么某些代码在 Python2 中是确定性的,而在 Python 3 中是非确定性的?