diff --git a/opm/autodiff/StandardWell.hpp b/opm/autodiff/StandardWell.hpp
index c81c47ce3..3dfdab135 100644
--- a/opm/autodiff/StandardWell.hpp
+++ b/opm/autodiff/StandardWell.hpp
@@ -48,6 +48,7 @@ namespace Opm
         using WellState = typename WellInterface<TypeTag>::WellState;
         using IntensiveQuantities = typename WellInterface<TypeTag>::IntensiveQuantities;
         using FluidSystem = typename WellInterface<TypeTag>::FluidSystem;
+        using MaterialLaw = typename WellInterface<TypeTag>::MaterialLaw;
 
         // the positions of the primary variables for StandardWell
         // there are three primary variables, the second and the third ones are F_w and F_g
@@ -99,11 +100,6 @@ namespace Opm
         virtual const std::vector<double>& perfPressureDiffs() const;
         virtual std::vector<double>& perfPressureDiffs();
 
-        virtual void assembleWellEq(Simulator& ebos_simulator,
-                                    const double dt,
-                                    WellState& well_state,
-                                    bool only_wells);
-
         virtual void setWellVariables(const WellState& well_state);
 
         EvalWell wellVolumeFractionScaled(const int phase) const;
@@ -120,26 +116,52 @@ namespace Opm
                              const double Tw, const EvalWell& bhp, const double& cdp,
                              const bool& allow_cf, std::vector<EvalWell>& cq_s) const;
 
+        void assembleWellEq(Simulator& ebosSimulator,
+                            const double dt,
+                            WellState& well_state,
+                            bool only_wells);
+
+        bool allow_cross_flow(const Simulator& ebosSimulator) const;
+
+        void getMobility(const Simulator& ebosSimulator,
+                         const int perf,
+                         std::vector<EvalWell>& mob) const;
+
+        // TODO: the parameters need to be optimized/adjusted
+        void init(const PhaseUsage* phase_usage_arg,
+                  const std::vector<bool>* active_arg,
+                  const VFPProperties* vfp_properties_arg,
+                  const double gravity_arg,
+                  const int num_cells);
+
         using WellInterface<TypeTag>::phaseUsage;
         using WellInterface<TypeTag>::active;
         using WellInterface<TypeTag>::numberOfPerforations;
+        using WellInterface<TypeTag>::wellCells;
+        using WellInterface<TypeTag>::saturationTableNumber;
         using WellInterface<TypeTag>::indexOfWell;
         using WellInterface<TypeTag>::name;
         using WellInterface<TypeTag>::wellType;
+        using WellInterface<TypeTag>::allowCrossFlow;
         using WellInterface<TypeTag>::wellControls;
         using WellInterface<TypeTag>::compFrac;
         using WellInterface<TypeTag>::numberOfPhases;
         using WellInterface<TypeTag>::perfDepth;
         using WellInterface<TypeTag>::flowToEbosPvIdx;
         using WellInterface<TypeTag>::flowPhaseToEbosPhaseIdx;
+        using WellInterface<TypeTag>::flowPhaseToEbosCompIdx;
         using WellInterface<TypeTag>::numComponents;
         using WellInterface<TypeTag>::numPhases;
         using WellInterface<TypeTag>::has_solvent;
+        using WellInterface<TypeTag>::wellIndex;
 
     protected:
 
+        void localInvert(Mat& istlA) const;
+
         using WellInterface<TypeTag>::vfp_properties_;
         using WellInterface<TypeTag>::gravity_;
+        using WellInterface<TypeTag>::well_efficiency_factor_;
 
         // densities of the fluid in each perforation
         std::vector<double> perf_densities_;
@@ -149,14 +171,20 @@ namespace Opm
         // TODO: probably, they should be moved to the WellInterface, when
         // we decide the template paramters.
         // two off-diagonal matrices
-        Mat dune_B_;
-        Mat dune_C_;
+        Mat duneB_;
+        Mat duneC_;
         // diagonal matrix for the well
-        Mat inv_dune_D_;
+        Mat invDuneD_;
 
-        BVector res_well_;
+        // several vector used in the matrix calculation
+        mutable BVector Cx_;
+        mutable BVector invDrw_;
+        mutable BVector scaleAddRes_;
+
+        BVector resWell_;
 
         std::vector<EvalWell> well_variables_;
+        std::vector<double> F0_;
 
         // TODO: this function should be moved to the base class.
         // while it faces chanllenges for MSWell later, since the calculation of bhp
diff --git a/opm/autodiff/StandardWell_impl.hpp b/opm/autodiff/StandardWell_impl.hpp
index 2a6f388aa..7c4493a1a 100644
--- a/opm/autodiff/StandardWell_impl.hpp
+++ b/opm/autodiff/StandardWell_impl.hpp
@@ -29,9 +29,62 @@ namespace Opm
     , perf_pressure_diffs_(numberOfPerforations())
     , well_variables_(numWellEq) // the number of the primary variables
     {
-        dune_B_.setBuildMode( Mat::row_wise );
-        dune_C_.setBuildMode( Mat::row_wise );
-        inv_dune_D_.setBuildMode( Mat::row_wise );
+        duneB_.setBuildMode( Mat::row_wise );
+        duneC_.setBuildMode( Mat::row_wise );
+        invDuneD_.setBuildMode( Mat::row_wise );
+    }
+
+
+
+
+
+    template<typename TypeTag>
+    void
+    StandardWell<TypeTag>::
+    init(const PhaseUsage* phase_usage_arg,
+         const std::vector<bool>* active_arg,
+         const VFPProperties* vfp_properties_arg,
+         const double gravity_arg,
+         const int num_cells)
+    {
+        WellInterface<TypeTag>(phase_usage_arg, active_arg,
+                               vfp_properties_arg, gravity_arg, num_cells);
+
+        // setup sparsity pattern for the matrices
+        // TODO: C and B are opposite compared with the notations used in the paper.
+        //[A B^T    [x    =  [ res
+        // C D] x_well]      res_well]
+        // set the size of the matrices
+        invDuneD_.setSize(1, 1, 1);
+        duneC_.setSize(1, num_cells, numberOfPerforations());
+        duneB_.setSize(1, num_cells, numberOfPerforations());
+
+        for (auto row=invDuneD_.createbegin(), end = invDuneD_.createend(); row!=end; ++row) {
+            // Add nonzeros for diagonal
+            row.insert(row.index());
+        }
+
+        for (auto row = duneC_.createbegin(), end = duneC_.createend(); row!=end; ++row) {
+            // Add nonzeros for diagonal
+            for (int perf = 0 ; perf < numberOfPerforations(); ++perf) {
+                const int cell_idx = wellCells()[perf];
+                row.insert(cell_idx);
+            }
+        }
+
+        // make the B^T matrix
+        for (auto row = duneB_.createbegin(), end = duneB_.createend(); row!=end; ++row) {
+            for (int perf = 0; perf < numberOfPerforations(); ++perf) {
+                const int cell_idx = wellCells()[perf];
+                row.insert(cell_idx);
+            }
+        }
+
+        resWell_.resize(1);
+
+        // resize temporary class variables
+        Cx_.resize( duneC_.N() );
+        invDrw_.resize( invDuneD_.N() );
     }
 
 
@@ -86,20 +139,6 @@ namespace Opm
 
 
 
-    template<typename TypeTag>
-    void
-    StandardWell<TypeTag>::
-    assembleWellEq(Simulator& ebos_simulator,
-                   const double dt,
-                   WellState& well_state,
-                   bool only_wells)
-    {
-    }
-
-
-
-
-
     template<typename TypeTag>
     void StandardWell<TypeTag>::
     setWellVariables(const WellState& well_state)
@@ -545,4 +584,211 @@ namespace Opm
         }
     }
 
+
+
+
+
+    template<typename TypeTag>
+    void
+    StandardWell<TypeTag>::
+    assembleWellEq(Simulator& ebosSimulator,
+                   const double dt,
+                   WellState& well_state,
+                   bool only_wells)
+    {
+        // TODO: accessing well_state information is the only place to use nw at the moment
+        const int nw = well_state.bhp().size();
+        const int numComp = numComponents();
+        const int np = numPhases();
+
+        // clear all entries
+        duneB_ = 0.0;
+        duneC_ = 0.0;
+        invDuneD_ = 0.0;
+        resWell_ = 0.0;
+
+        auto& ebosJac = ebosSimulator.model().linearizer().matrix();
+        auto& ebosResid = ebosSimulator.model().linearizer().residual();
+
+        // TODO: it probably can be static member for StandardWell
+        const double volume = 0.002831684659200; // 0.1 cu ft;
+
+        const bool allow_cf = allow_cross_flow(ebosSimulator);
+
+        const EvalWell& bhp = getBhp();
+
+        for (int perf = 0; perf < numberOfPerforations(); ++perf) {
+
+            const int cell_idx = wellCells()[perf];
+            const auto& intQuants = *(ebosSimulator.model().cachedIntensiveQuantities(cell_idx, /*timeIdx=*/ 0));
+            std::vector<EvalWell> cq_s(numComp,0.0);
+            std::vector<EvalWell> mob(numComp, 0.0);
+            getMobility(ebosSimulator, perf, mob);
+            computePerfRate(intQuants, mob, wellIndex()[perf], bhp, perfPressureDiffs()[perf], allow_cf, cq_s);
+
+                for (int componentIdx = 0; componentIdx < numComp; ++componentIdx) {
+                    // the cq_s entering mass balance equations need to consider the efficiency factors.
+                    const EvalWell cq_s_effective = cq_s[componentIdx] * well_efficiency_factor_;
+
+                    if (!only_wells) {
+                        // subtract sum of component fluxes in the reservoir equation.
+                        // need to consider the efficiency factor
+                        ebosResid[cell_idx][flowPhaseToEbosCompIdx(componentIdx)] -= cq_s_effective.value();
+                    }
+
+                    // subtract sum of phase fluxes in the well equations.
+                    resWell_[0][componentIdx] -= cq_s[componentIdx].value();
+
+                    // assemble the jacobians
+                    for (int pvIdx = 0; pvIdx < numWellEq; ++pvIdx) {
+                        if (!only_wells) {
+                            // also need to consider the efficiency factor when manipulating the jacobians.
+                            ebosJac[cell_idx][cell_idx][flowPhaseToEbosCompIdx(componentIdx)][flowToEbosPvIdx(pvIdx)] -= cq_s_effective.derivative(pvIdx);
+                            duneB_[0][cell_idx][pvIdx][flowPhaseToEbosCompIdx(componentIdx)] -= cq_s_effective.derivative(pvIdx+numEq); // intput in transformed matrix
+                            duneC_[0][cell_idx][componentIdx][flowToEbosPvIdx(pvIdx)] -= cq_s_effective.derivative(pvIdx);
+                        }
+                        invDuneD_[0][0][componentIdx][pvIdx] -= cq_s[componentIdx].derivative(pvIdx+numEq);
+                    }
+
+                    // add trivial equation for 2p cases (Only support water + oil)
+                    if (numComp == 2) {
+                        assert(!active()[ Gas ]);
+                        invDuneD_[0][0][Gas][Gas] = 1.0;
+                    }
+
+                    // Store the perforation phase flux for later usage.
+                    if (componentIdx == solventCompIdx) {// if (flowPhaseToEbosCompIdx(componentIdx) == Solvent)
+                        well_state.perfRateSolvent()[perf] = cq_s[componentIdx].value();
+                    } else {
+                        well_state.perfPhaseRates()[perf*np + componentIdx] = cq_s[componentIdx].value();
+                    }
+                }
+
+                // Store the perforation pressure for later usage.
+                well_state.perfPress()[perf] = well_state.bhp()[indexOfWell()] + perfPressureDiffs()[perf];
+            }
+
+            // add vol * dF/dt + Q to the well equations;
+            for (int componentIdx = 0; componentIdx < numComp; ++componentIdx) {
+                // TODO: the F0_ here is not initialized yet here, which should happen in the first iteration, so it should happen in the assemble function
+                EvalWell resWell_loc = (wellSurfaceVolumeFraction(componentIdx) - F0_[componentIdx]) * volume / dt;
+                resWell_loc += getQs(componentIdx);
+                for (int pvIdx = 0; pvIdx < numWellEq; ++pvIdx) {
+                    invDuneD_[0][0][componentIdx][pvIdx] += resWell_loc.derivative(pvIdx+numEq);
+                }
+                resWell_[0][componentIdx] += resWell_loc.value();
+            }
+
+        // do the local inversion of D.
+        localInvert( invDuneD_ );
+    }
+
+
+
+
+
+    template<typename TypeTag>
+    bool
+    StandardWell<TypeTag>::
+    allow_cross_flow(const Simulator& ebosSimulator) const
+    {
+        if (allowCrossFlow()) {
+            return true;
+        }
+
+        // TODO: investigate the justification of the following situation
+
+        // check for special case where all perforations have cross flow
+        // then the wells must allow for cross flow
+        for (int perf = 0; perf < numberOfPerforations(); ++perf) {
+            const int cell_idx = wellCells()[perf];
+            const auto& intQuants = *(ebosSimulator.model().cachedIntensiveQuantities(cell_idx, /*timeIdx=*/0));
+            const auto& fs = intQuants.fluidState();
+            EvalWell pressure = extendEval(fs.pressure(FluidSystem::oilPhaseIdx));
+            EvalWell bhp = getBhp();
+
+            // Pressure drawdown (also used to determine direction of flow)
+            EvalWell well_pressure = bhp + perfPressureDiffs()[perf];
+            EvalWell drawdown = pressure - well_pressure;
+
+            if (drawdown.value() < 0 && wellType() == INJECTOR)  {
+                return false;
+            }
+
+            if (drawdown.value() > 0 && wellType() == PRODUCER)  {
+                return false;
+            }
+        }
+        return true;
+    }
+
+
+
+
+
+    template<typename TypeTag>
+    void
+    StandardWell<TypeTag>::
+    getMobility(const Simulator& ebosSimulator,
+                const int perf,
+                std::vector<EvalWell>& mob) const
+    {
+        const int np = numberOfPhases();
+        const int cell_idx = wellCells()[perf];
+        assert (int(mob.size()) == numComponents());
+        const auto& intQuants = *(ebosSimulator.model().cachedIntensiveQuantities(cell_idx, /*timeIdx=*/0));
+        const auto& materialLawManager = ebosSimulator.problem().materialLawManager();
+
+        // either use mobility of the perforation cell or calcualte its own
+        // based on passing the saturation table index
+        const int satid = saturationTableNumber()[perf] - 1;
+        const int satid_elem = materialLawManager->satnumRegionIdx(cell_idx);
+        if( satid == satid_elem ) { // the same saturation number is used. i.e. just use the mobilty from the cell
+
+            for (int phase = 0; phase < np; ++phase) {
+                int ebosPhaseIdx = flowPhaseToEbosPhaseIdx(phase);
+                mob[phase] = extendEval(intQuants.mobility(ebosPhaseIdx));
+            }
+            if (has_solvent) {
+                mob[solventCompIdx] = extendEval(intQuants.solventMobility());
+            }
+        } else {
+
+            const auto& paramsCell = materialLawManager->connectionMaterialLawParams(satid, cell_idx);
+            Eval relativePerms[3] = { 0.0, 0.0, 0.0 };
+            MaterialLaw::relativePermeabilities(relativePerms, paramsCell, intQuants.fluidState());
+
+            // reset the satnumvalue back to original
+            materialLawManager->connectionMaterialLawParams(satid_elem, cell_idx);
+
+            // compute the mobility
+            for (int phase = 0; phase < np; ++phase) {
+                int ebosPhaseIdx = flowPhaseToEbosPhaseIdx(phase);
+                mob[phase] = extendEval(relativePerms[ebosPhaseIdx] / intQuants.fluidState().viscosity(ebosPhaseIdx));
+            }
+
+            // this may not work if viscosity and relperms has been modified?
+            if (has_solvent) {
+                OPM_THROW(std::runtime_error, "individual mobility for wells does not work in combination with solvent");
+            }
+        }
+    }
+
+
+
+
+
+    template<typename TypeTag>
+    void
+    StandardWell<TypeTag>::
+    localInvert(Mat& istlA) const
+    {
+        for (auto row = istlA.begin(), rowend = istlA.end(); row != rowend; ++row ) {
+            for (auto col = row->begin(), colend = row->end(); col != colend; ++col ) {
+                //std::cout << (*col) << std::endl;
+                (*col).invert();
+            }
+        }
+    }
+
 }
diff --git a/opm/autodiff/StandardWellsDense_impl.hpp b/opm/autodiff/StandardWellsDense_impl.hpp
index 5b390236b..6d1c32eaf 100644
--- a/opm/autodiff/StandardWellsDense_impl.hpp
+++ b/opm/autodiff/StandardWellsDense_impl.hpp
@@ -128,6 +128,14 @@ namespace Opm {
             }
         }
 
+        // do the initialization work
+
+        // do the initialization for all the wells
+        // TODO: to see whether we can postpone of the intialization of the well containers to
+        // optimize the usage of the following several member variables
+        for (auto& well : well_container_) {
+            well->init(&phase_usage_, &active_, vfp_properties_, gravity_, nc);
+        }
     }
 
 
diff --git a/opm/autodiff/WellInterface.hpp b/opm/autodiff/WellInterface.hpp
index db12d0cfc..a44a9fce0 100644
--- a/opm/autodiff/WellInterface.hpp
+++ b/opm/autodiff/WellInterface.hpp
@@ -58,6 +58,7 @@ namespace Opm
         typedef typename GET_PROP_TYPE(TypeTag, FluidSystem) FluidSystem;
         typedef typename GET_PROP_TYPE(TypeTag, Indices) BlackoilIndices;
         typedef typename GET_PROP_TYPE(TypeTag, IntensiveQuantities) IntensiveQuantities;
+        typedef typename GET_PROP_TYPE(TypeTag, MaterialLaw) MaterialLaw;
 
         static const int solventCompIdx = 3; //TODO get this from ebos
         static const bool has_solvent = GET_PROP_VALUE(TypeTag, EnableSolvent);
@@ -107,16 +108,12 @@ namespace Opm
         virtual const std::vector<double>& perfPressureDiffs() const = 0;
         virtual std::vector<double>& perfPressureDiffs() = 0;
 
-
-        virtual void assembleWellEq(Simulator& ebos_simulator,
-                                    const double dt,
-                                    WellState& well_state,
-                                    bool only_wells) = 0;
-
+        // TODO: the parameters need to be optimized/adjusted
         void init(const PhaseUsage* phase_usage_arg,
                   const std::vector<bool>* active_arg,
                   const VFPProperties* vfp_properties_arg,
-                  const double gravity_arg);
+                  const double gravity_arg,
+                  const int num_cells);
 
         // TODO: temporary
         virtual void setWellVariables(const WellState& well_state) = 0;
@@ -135,6 +132,11 @@ namespace Opm
 
         int numComponents() const;
 
+        bool allowCrossFlow() const;
+
+        // TODO: for this kind of function, maybe can make a function with parameter perf
+        const std::vector<int>& saturationTableNumber() const;
+
     protected:
         // TODO: some variables shared by all the wells should be made static
         // well name
@@ -147,6 +149,9 @@ namespace Opm
         // INJECTOR or PRODUCER
         enum WellType well_type_;
 
+        // whether the well allows crossflow
+        bool allow_cf_;
+
         // number of phases
         int number_of_phases_;
 
@@ -167,9 +172,14 @@ namespace Opm
         // depth for each perforation
         std::vector<double> perf_depth_;
 
+        double well_efficiency_factor_;
+
         // cell index for each well perforation
         std::vector<int> well_cell_;
 
+        // saturation table nubmer for each well perforation
+        std::vector<int> saturation_table_number_;
+
         const PhaseUsage* phase_usage_;
 
         const std::vector<bool>* active_;
diff --git a/opm/autodiff/WellInterface_impl.hpp b/opm/autodiff/WellInterface_impl.hpp
index 9ea094541..4948ba944 100644
--- a/opm/autodiff/WellInterface_impl.hpp
+++ b/opm/autodiff/WellInterface_impl.hpp
@@ -47,6 +47,7 @@ namespace Opm
         name_ = well_name;
         index_of_well_ = index_well;
         well_type_ = wells->type[index_well];
+        allow_cf_ = wells->allow_cf[index_well];
         number_of_phases_ = wells->number_of_phases;
 
         // copying the comp_frac
@@ -75,6 +76,12 @@ namespace Opm
                       wells->WI + perf_index_end,
                       well_index_.begin() );
 
+            saturation_table_number_.resize(number_of_perforations_);
+            std::copy(wells->sat_table_id + perf_index_begin,
+                      wells->sat_table_id + perf_index_end,
+                      saturation_table_number_.begin() );
+
+
             // TODO: not sure about the processing of depth for perforations here
             // Will revisit here later. There are different ways and the definition for different wells
             // can be different, it is possible that we need to remove this from the WellInterface
@@ -84,6 +91,9 @@ namespace Opm
                 perf_depth_[i] = completion_set.get(i).getCenterDepth();
             }
         }
+
+        well_efficiency_factor_ = 1.0;
+        // TODO: need to calculate based on wellCollections, or it should happen in the Well Model side.
     }
 
 
@@ -96,7 +106,8 @@ namespace Opm
     init(const PhaseUsage* phase_usage_arg,
          const std::vector<bool>* active_arg,
          const VFPProperties* vfp_properties_arg,
-         const double gravity_arg)
+         const double gravity_arg,
+         const int /* num_cells */)
     {
         phase_usage_ = phase_usage_arg;
         active_ = active_arg;
@@ -179,6 +190,18 @@ namespace Opm
 
 
 
+    template<typename TypeTag>
+    const std::vector<int>&
+    WellInterface<TypeTag>::
+    saturationTableNumber() const
+    {
+        return saturation_table_number_;
+    }
+
+
+
+
+
     template<typename TypeTag>
     int
     WellInterface<TypeTag>::