Week 2 Notebook fails with Pandas 2.0.3

I’m running these notebooks locally to get some reps in with Jupyter Notebook and the python environment, and I noticed that step 5 (“Select a subset of turbines”) failed in my local environment which is running pandas 2.0.3

The course’s environment is running 1.5.1 and I verified that step 5 worked correctly when I downgraded my local environment to 1.5.1

My python debugging of third-party libraries is pretty weak, but it’s failing because it’s trying to parse a long run of colon-separated integers from a string into a float. I can provide more errors if anyone is interested

1 Like

You should provide more errors for mentors, and if they can will help you!

1 Like

Hey, @Ed_Norris
It’s great to observe your initiative in working with the notebooks locally. However, I’d like to highlight a potential concern that could arise over time. The functions defined within the Coursera notebooks might not function optimally in all cases, particularly the grader functions.
Furthermore, attaching a screenshot of the error would enhance the clarity in understanding the error.

Regards,
Abhinav

1 Like

Here’s the error from " 5. Select a subset of turbines" in C2_W2_Lab_1_Wind_Energy_Explore.ipynb. The “snip” is about 175K bytes of colon separated integers.
And that’s a good point about grading - I’ll make sure I’m only submitting work from the Coursera servers.

Keep the data for a 10-turbine wind farm

top_turbines = utils.top_n_turbines(raw_data, 10)


NotImplementedError Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:1490, in GroupBy._cython_agg_general..array_func(values)
1489 try:
→ 1490 result = self.grouper._cython_operation(
1491 “aggregate”,
1492 values,
1493 how,
1494 axis=data.ndim - 1,
1495 min_count=min_count,
1496 **kwargs,
1497 )
1498 except NotImplementedError:
1499 # generally if we have numeric_only=False
1500 # and non-applicable functions
1501 # try to python agg
1502 # TODO: shouldn’t min_count matter?

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:959, in BaseGrouper._cython_operation(self, kind, values, how, axis, min_count, **kwargs)
958 ngroups = self.ngroups
→ 959 return cy_op.cython_operation(
960 values=values,
961 axis=axis,
962 min_count=min_count,
963 comp_ids=ids,
964 ngroups=ngroups,
965 **kwargs,
966 )

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:657, in WrappedCythonOp.cython_operation(self, values, axis, min_count, comp_ids, ngroups, **kwargs)
649 return self._ea_wrap_cython_operation(
650 values,
651 min_count=min_count,
(…)
654 **kwargs,
655 )
→ 657 return self._cython_op_ndim_compat(
658 values,
659 min_count=min_count,
660 ngroups=ngroups,
661 comp_ids=comp_ids,
662 mask=None,
663 **kwargs,
664 )

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:497, in WrappedCythonOp._cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
495 return res.T
→ 497 return self._call_cython_op(
498 values,
499 min_count=min_count,
500 ngroups=ngroups,
501 comp_ids=comp_ids,
502 mask=mask,
503 result_mask=result_mask,
504 **kwargs,
505 )

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:541, in WrappedCythonOp._call_cython_op(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
540 out_shape = self._get_output_shape(ngroups, values)
→ 541 func = self._get_cython_function(self.kind, self.how, values.dtype, is_numeric)
542 values = self._get_cython_vals(values)

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:173, in WrappedCythonOp._get_cython_function(cls, kind, how, dtype, is_numeric)
171 if “object” not in f.signatures:
172 # raise NotImplementedError here rather than TypeError later
→ 173 raise NotImplementedError(
174 f"function is not implemented for this dtype: "
175 f"[how->{how},dtype->{dtype_str}]"
176 )
177 return f

NotImplementedError: function is not implemented for this dtype: [how->mean,dtype->object]

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:1692, in _ensure_numeric(x)
1691 try:
→ 1692 x = float(x)
1693 except (TypeError, ValueError):
1694 # e.g. “1+1j” or “foo”

ValueError: could not convert string to float: ‘00:1000:2000:3000:4000:[snip]:4023:50’

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:1696, in _ensure_numeric(x)
1695 try:
→ 1696 x = complex(x)
1697 except ValueError as err:
1698 # e.g. “foo”

ValueError: complex() arg is a malformed string

The above exception was the direct cause of the following exception:

TypeError Traceback (most recent call last)
Cell In[8], line 2
1 # Keep the data for a 10-turbine wind farm
----> 2 top_turbines = utils.top_n_turbines(raw_data, 10)

File ~/dev/Coursera/AiForGood/2-AI_and_ClimateChange/Week2/1-exploring/utils.py:52, in top_n_turbines(raw_data, n)
39 def top_n_turbines(
40 raw_data: pd.core.frame.DataFrame, n: int
41 ) → pd.core.frame.DataFrame:
42 “”“Keeps only the top n turbines that produced more energy on average.
43
44 Args:
(…)
49 pd.core.frame.DataFrame: The dataset with only the data from the top n turbines.
50 “””
51 sorted_patv_by_turbine = (
—> 52 raw_data.groupby(“TurbID”).mean()[“Patv (kW)”].sort_values(ascending=False)
53 )
55 top_turbines = list(sorted_patv_by_turbine.index)[:n]
57 print(
58 f"Original data has {len(raw_data)} rows from {len(raw_data.TurbID.unique())} turbines.\n"
59 )

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:1855, in GroupBy.mean(self, numeric_only, engine, engine_kwargs)
1853 return self._numba_agg_general(sliding_mean, engine_kwargs)
1854 else:
→ 1855 result = self._cython_agg_general(
1856 “mean”,
1857 alt=lambda x: Series(x).mean(numeric_only=numeric_only),
1858 numeric_only=numeric_only,
1859 )
1860 return result.finalize(self.obj, method=“groupby”)

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:1507, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs)
1503 result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
1505 return result
→ 1507 new_mgr = data.grouped_reduce(array_func)
1508 res = self._wrap_agged_manager(new_mgr)
1509 out = self._wrap_aggregated_output(res)

File /usr/local/lib/python3.11/site-packages/pandas/core/internals/managers.py:1503, in BlockManager.grouped_reduce(self, func)
1499 if blk.is_object:
1500 # split on object-dtype blocks bc some columns may raise
1501 # while others do not.
1502 for sb in blk._split():
→ 1503 applied = sb.apply(func)
1504 result_blocks = extend_blocks(applied, result_blocks)
1505 else:

File /usr/local/lib/python3.11/site-packages/pandas/core/internals/blocks.py:329, in Block.apply(self, func, **kwargs)
323 @final
324 def apply(self, func, **kwargs) → list[Block]:
325 “”"
326 apply the function to my values; return a block if we are not
327 one
328 “”"
→ 329 result = func(self.values, **kwargs)
331 return self._split_op_result(result)

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:1503, in GroupBy._cython_agg_general..array_func(values)
1490 result = self.grouper._cython_operation(
1491 “aggregate”,
1492 values,
(…)
1496 **kwargs,
1497 )
1498 except NotImplementedError:
1499 # generally if we have numeric_only=False
1500 # and non-applicable functions
1501 # try to python agg
1502 # TODO: shouldn’t min_count matter?
→ 1503 result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
1505 return result

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:1457, in GroupBy._agg_py_fallback(self, values, ndim, alt)
1452 ser = df.iloc[:, 0]
1454 # We do not get here with UDFs, so we know that our dtype
1455 # should always be preserved by the implemented aggregations
1456 # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
→ 1457 res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
1459 if isinstance(values, Categorical):
1460 # Because we only get here with known dtype-preserving
1461 # reductions, we cast back to Categorical.
1462 # TODO: if we ever get “rank” working, exclude it here.
1463 res_values = type(values)._from_sequence(res_values, dtype=values.dtype)

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:994, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
987 if len(obj) > 0 and not isinstance(obj._values, np.ndarray):
988 # we can preserve a little bit more aggressively with EA dtype
989 # because maybe_cast_pointwise_result will do a try/except
990 # with _from_sequence. NB we are assuming here that _from_sequence
991 # is sufficiently strict that it casts appropriately.
992 preserve_dtype = True
→ 994 result = self._aggregate_series_pure_python(obj, func)
996 npvalues = lib.maybe_convert_objects(result, try_float=False)
997 if preserve_dtype:

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/ops.py:1015, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
1012 splitter = self._get_splitter(obj, axis=0)
1014 for i, group in enumerate(splitter):
→ 1015 res = func(group)
1016 res = libreduction.extract_result(res)
1018 if not initialized:
1019 # We only do this validation on the first iteration

File /usr/local/lib/python3.11/site-packages/pandas/core/groupby/groupby.py:1857, in GroupBy.mean..(x)
1853 return self._numba_agg_general(sliding_mean, engine_kwargs)
1854 else:
1855 result = self._cython_agg_general(
1856 “mean”,
→ 1857 alt=lambda x: Series(x).mean(numeric_only=numeric_only),
1858 numeric_only=numeric_only,
1859 )
1860 return result.finalize(self.obj, method=“groupby”)

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:11556, in NDFrame._add_numeric_operations..mean(self, axis, skipna, numeric_only, **kwargs)
11539 @doc(
11540 _num_doc,
11541 desc=“Return the mean of the values over the requested axis.”,
(…)
11554 **kwargs,
11555 ):

11556 return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:11201, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
11194 def mean(
11195 self,
11196 axis: Axis | None = 0,
(…)
11199 **kwargs,
11200 ) → Series | float:

11201 return self._stat_function(
11202 “mean”, nanops.nanmean, axis, skipna, numeric_only, **kwargs
11203 )

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:11158, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
11154 nv.validate_stat_func((), kwargs, fname=name)
11156 validate_bool_kwarg(skipna, “skipna”, none_allowed=False)

11158 return self._reduce(
11159 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
11160 )

File /usr/local/lib/python3.11/site-packages/pandas/core/series.py:4670, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
4665 raise TypeError(
4666 f"Series.{name} does not allow {kwd_name}={numeric_only} "
4667 “with non-numeric dtypes.”
4668 )
4669 with np.errstate(all=“ignore”):
→ 4670 return op(delegate, skipna=skipna, **kwds)

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:96, in disallow.call.._f(*args, **kwargs)
94 try:
95 with np.errstate(invalid=“ignore”):
—> 96 return f(*args, **kwargs)
97 except ValueError as e:
98 # we want to transform an object array
99 # ValueError message to the more typical TypeError
100 # e.g. this is normally a disallowed function on
101 # object arrays that contain strings
102 if is_object_dtype(args[0]):

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:158, in bottleneck_switch.call..f(values, axis, skipna, **kwds)
156 result = alt(values, axis=axis, skipna=skipna, **kwds)
157 else:
→ 158 result = alt(values, axis=axis, skipna=skipna, **kwds)
160 return result

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:421, in _datetimelike_compat..new_func(values, axis, skipna, mask, **kwargs)
418 if datetimelike and mask is None:
419 mask = isna(values)
→ 421 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
423 if datetimelike:
424 result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:727, in nanmean(values, axis, skipna, mask)
724 dtype_count = dtype
726 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
→ 727 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
729 if axis is not None and getattr(the_sum, “ndim”, False):
730 count = cast(np.ndarray, count)

File /usr/local/lib/python3.11/site-packages/pandas/core/nanops.py:1699, in _ensure_numeric(x)
1696 x = complex(x)
1697 except ValueError as err:
1698 # e.g. “foo”
→ 1699 raise TypeError(f"Could not convert {x} to numeric") from err
1700 return x

TypeError: Could not convert 00:1000:2000:3000:4000:[snip]:4023:50 to numeric

1 Like