-
-
Notifications
You must be signed in to change notification settings - Fork 19.2k
Description
Code Sample, a copy-pastable example if possible
In [2]: i = pd.Series(list('abcdefghijk'*10**5))
In [3]: alt = [-1, 'AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GR']*6
In [4]: res = i[:10**6].isin(alt)
In [5]: res = i[:10**6+1].isin(alt)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-f3e21d855671> in <module>()
----> 1 res = i[:10**6+1].isin(alt)
/home/pietro/nobackup/repo/pandas/pandas/core/series.py in isin(self, values)
2458
2459 """
-> 2460 result = algorithms.isin(_values_from_object(self), values)
2461 return self._constructor(result, index=self.index).__finalize__(self)
2462
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in isin(comps, values)
421 comps = comps.astype(object)
422
--> 423 return f(comps, values)
424
425
/home/pietro/nobackup/repo/pandas/pandas/core/algorithms.py in <lambda>(x, y)
401 f = lambda x, y: htable.ismember_object(x, values)
402 if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
--> 403 f = lambda x, y: np.in1d(x, y)
404 elif is_integer_dtype(comps):
405 try:
/usr/lib/python3/dist-packages/numpy/lib/arraysetops.py in in1d(ar1, ar2, assume_unique, invert)
399 if not assume_unique:
400 ar1, rev_idx = np.unique(ar1, return_inverse=True)
--> 401 ar2 = np.unique(ar2)
402
403 ar = np.concatenate((ar1, ar2))
/usr/lib/python3/dist-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts)
212 aux = ar[perm]
213 else:
--> 214 ar.sort()
215 aux = ar
216 flag = np.concatenate(([True], aux[1:] != aux[:-1]))
TypeError: unorderable types: str() > int()Problem description
Although the length of alt also matters in some way, even with a completely different dataset the problem still started at 1M elements in the Series (or even Index) being searched. By the way, triggering the error takes much more time than the successful operation.
Might be related to #13432 , although that one is unrelated to the length of the Series.
Expected Output
Like In [4], just with one more element.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.5.3.final.0
python-bits: 64
OS: Linux
OS-release: 4.7.0-1-amd64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: it_IT.utf8
LOCALE: it_IT.UTF-8
pandas: 0.19.0+783.gcd35d22a0
pytest: 3.0.6
pip: 9.0.1
setuptools: 33.1.1
Cython: 0.25.2
numpy: 1.12.0
scipy: 0.18.1
xarray: 0.9.1
IPython: 5.1.0.dev
sphinx: 1.4.9
patsy: 0.3.0-dev
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: 1.2.0
tables: 3.3.0
numexpr: 2.6.1
feather: 0.3.1
matplotlib: 2.0.0
openpyxl: 2.3.0
xlrd: 1.0.0
xlwt: 1.1.2
xlsxwriter: 0.9.6
lxml: 3.7.1
bs4: 4.5.3
html5lib: 0.999999999
sqlalchemy: 1.0.15
pymysql: None
psycopg2: None
jinja2: 2.8
s3fs: None
pandas_gbq: None
pandas_datareader: 0.2.1