Hey guys,
My Mongo collection has a document with a key prefixed with ‘$$’. When I try to read the collection through the Mongo spark connector it throws an exception as follows:
com.mongodb.MongoCommandException: Command failed with error 16410 (Location16410): 'Invalid $project :: caused by :: FieldPath field names may not start with '$'. Consider using $getField or $setField.' on server localhost:27017. The full response is {"ok": 0.0, "errmsg": "Invalid $project :: caused by :: FieldPath field names may not start with '$'. Consider using $getField or $setField.", "code": 16410, "codeName": "Location16410"}
at com.mongodb.internal.connection.ProtocolHelper.getCommandFailureException(ProtocolHelper.java:198) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.InternalStreamConnection.receiveCommandMessageResponse(InternalStreamConnection.java:416) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.InternalStreamConnection.sendAndReceive(InternalStreamConnection.java:340) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.UsageTrackingInternalConnection.sendAndReceive(UsageTrackingInternalConnection.java:116) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.DefaultConnectionPool$PooledConnection.sendAndReceive(DefaultConnectionPool.java:643) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.CommandProtocolImpl.execute(CommandProtocolImpl.java:71) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.DefaultServer$DefaultServerProtocolExecutor.execute(DefaultServer.java:206) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.DefaultServerConnection.executeProtocol(DefaultServerConnection.java:119) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.DefaultServerConnection.command(DefaultServerConnection.java:85) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.DefaultServerConnection.command(DefaultServerConnection.java:75) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.connection.DefaultServer$OperationCountTrackingConnection.command(DefaultServer.java:293) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.CommandOperationHelper.createReadCommandAndExecute(CommandOperationHelper.java:233) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.CommandOperationHelper.lambda$executeRetryableRead$4(CommandOperationHelper.java:215) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.OperationHelper.lambda$withSourceAndConnection$0(OperationHelper.java:356) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.OperationHelper.withSuppliedResource(OperationHelper.java:381) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.OperationHelper.lambda$withSourceAndConnection$1(OperationHelper.java:355) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.OperationHelper.withSuppliedResource(OperationHelper.java:381) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.OperationHelper.withSourceAndConnection(OperationHelper.java:354) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.CommandOperationHelper.lambda$executeRetryableRead$5(CommandOperationHelper.java:213) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.async.function.RetryingSyncSupplier.get(RetryingSyncSupplier.java:67) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.CommandOperationHelper.executeRetryableRead(CommandOperationHelper.java:218) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.CommandOperationHelper.executeRetryableRead(CommandOperationHelper.java:199) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.AggregateOperationImpl.execute(AggregateOperationImpl.java:194) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.AggregateOperation.execute(AggregateOperation.java:150) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.internal.operation.AggregateOperation.execute(AggregateOperation.java:44) ~[mongodb-driver-core-4.8.2.jar:?]
at com.mongodb.client.internal.MongoClientDelegate$DelegateOperationExecutor.execute(MongoClientDelegate.java:191) ~[mongodb-driver-sync-4.8.2.jar:?]
at com.mongodb.client.internal.MongoIterableImpl.execute(MongoIterableImpl.java:133) ~[mongodb-driver-sync-4.8.2.jar:?]
at com.mongodb.client.internal.MongoIterableImpl.iterator(MongoIterableImpl.java:90) ~[mongodb-driver-sync-4.8.2.jar:?]
at com.mongodb.client.internal.MongoIterableImpl.cursor(MongoIterableImpl.java:95) ~[mongodb-driver-sync-4.8.2.jar:?]
at com.mongodb.spark.sql.connector.read.MongoBatchPartitionReader.getCursor(MongoBatchPartitionReader.java:108) ~[mongo-spark-connector_2.12-10.2.2.jar:?]
at com.mongodb.spark.sql.connector.read.MongoBatchPartitionReader.next(MongoBatchPartitionReader.java:72) ~[mongo-spark-connector_2.12-10.2.2.jar:?]
at org.apache.spark.sql.execution.datasources.v2.PartitionIterator.hasNext(DataSourceRDD.scala:119) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.datasources.v2.MetricsIterator.hasNext(DataSourceRDD.scala:156) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.$anonfun$hasNext$1(DataSourceRDD.scala:63) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.$anonfun$hasNext$1$adapted(DataSourceRDD.scala:63) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at scala.Option.exists(Option.scala:376) ~[scala-library-2.12.18.jar:?]
at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.hasNext(DataSourceRDD.scala:63) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.advanceToNextIter(DataSourceRDD.scala:97) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.datasources.v2.DataSourceRDD$$anon$1.hasNext(DataSourceRDD.scala:63) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) ~[scala-library-2.12.18.jar:?]
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?]
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364) ~[spark-sql_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.4.jar:3.3.4]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]
at java.lang.Thread.run(Thread.java:834) ~[?:?]
How can we handle such data or skip the same?